cocoindex-code 0.2.10__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/PKG-INFO +3 -1
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/README.md +2 -0
- cocoindex_code-0.2.11/src/cocoindex_code/_version.py +24 -0
- cocoindex_code-0.2.11/src/cocoindex_code/litellm_embedder.py +126 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/settings.py +5 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/shared.py +16 -3
- cocoindex_code-0.2.10/src/cocoindex_code/_version.py +0 -34
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/.gitignore +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/LICENSE +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/pyproject.toml +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/__init__.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/__main__.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/chunking.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/cli.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/client.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/daemon.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/indexer.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/project.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/protocol.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/query.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/schema.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cocoindex-code
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: MCP server for indexing and querying codebases using CocoIndex
|
|
5
5
|
Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
|
|
6
6
|
Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
|
|
@@ -345,6 +345,7 @@ embedding:
|
|
|
345
345
|
provider: sentence-transformers # or "litellm"
|
|
346
346
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
347
347
|
device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
|
|
348
|
+
min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
|
|
348
349
|
|
|
349
350
|
envs: # extra environment variables for the daemon
|
|
350
351
|
OPENAI_API_KEY: your-key # only needed if not already in your shell environment
|
|
@@ -436,6 +437,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
|
|
|
436
437
|
```yaml
|
|
437
438
|
embedding:
|
|
438
439
|
model: text-embedding-3-small
|
|
440
|
+
min_interval_ms: 300 # optional: override the 5ms LiteLLM default
|
|
439
441
|
envs:
|
|
440
442
|
OPENAI_API_KEY: your-api-key
|
|
441
443
|
```
|
|
@@ -306,6 +306,7 @@ embedding:
|
|
|
306
306
|
provider: sentence-transformers # or "litellm"
|
|
307
307
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
308
308
|
device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
|
|
309
|
+
min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
|
|
309
310
|
|
|
310
311
|
envs: # extra environment variables for the daemon
|
|
311
312
|
OPENAI_API_KEY: your-key # only needed if not already in your shell environment
|
|
@@ -397,6 +398,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
|
|
|
397
398
|
```yaml
|
|
398
399
|
embedding:
|
|
399
400
|
model: text-embedding-3-small
|
|
401
|
+
min_interval_ms: 300 # optional: override the 5ms LiteLLM default
|
|
400
402
|
envs:
|
|
401
403
|
OPENAI_API_KEY: your-api-key
|
|
402
404
|
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.2.11'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 11)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""LiteLLM embedder with optional request pacing and rate-limit retries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import cocoindex as coco
|
|
12
|
+
import numpy as np
|
|
13
|
+
from cocoindex.ops.litellm import LiteLLMEmbedder, litellm
|
|
14
|
+
from numpy.typing import NDArray
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
_RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE)
|
|
19
|
+
_MAX_RATE_LIMIT_RETRIES = 6
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_rate_limit_delay(exc: Exception, attempt: int) -> float | None:
|
|
23
|
+
message = str(exc)
|
|
24
|
+
if "rate limit" not in message.lower():
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
match = _RATE_LIMIT_DELAY_RE.search(message)
|
|
28
|
+
if match is not None:
|
|
29
|
+
value = float(match.group(1))
|
|
30
|
+
unit = match.group(2).lower()
|
|
31
|
+
delay = value / 1000.0 if unit == "ms" else value
|
|
32
|
+
else:
|
|
33
|
+
delay = min(0.5 * (2**attempt), 10.0)
|
|
34
|
+
|
|
35
|
+
return min(delay + 0.1, 10.0)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PacedLiteLLMEmbedder(LiteLLMEmbedder):
|
|
39
|
+
"""LiteLLM embedder that serializes requests and paces them when configured."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model: str, *, min_interval_ms: int | None = None, **kwargs: Any) -> None:
|
|
42
|
+
super().__init__(model, **kwargs)
|
|
43
|
+
self._min_request_interval_seconds = max(0.0, float(min_interval_ms or 0) / 1000.0)
|
|
44
|
+
self._request_lock: asyncio.Lock | None = None
|
|
45
|
+
self._next_request_at: float = 0.0
|
|
46
|
+
|
|
47
|
+
def _get_request_lock(self) -> asyncio.Lock:
|
|
48
|
+
if self._request_lock is None:
|
|
49
|
+
self._request_lock = asyncio.Lock()
|
|
50
|
+
return self._request_lock
|
|
51
|
+
|
|
52
|
+
async def _aembedding_with_rate_limit_retries(
|
|
53
|
+
self, *, model: str, input: list[str], **kwargs: Any
|
|
54
|
+
) -> Any:
|
|
55
|
+
last_exc: Exception | None = None
|
|
56
|
+
|
|
57
|
+
for attempt in range(_MAX_RATE_LIMIT_RETRIES):
|
|
58
|
+
try:
|
|
59
|
+
return await litellm.aembedding(model=model, input=input, **kwargs)
|
|
60
|
+
except Exception as exc: # noqa: BLE001
|
|
61
|
+
delay = _get_rate_limit_delay(exc, attempt)
|
|
62
|
+
last_exc = exc
|
|
63
|
+
if delay is None or attempt == _MAX_RATE_LIMIT_RETRIES - 1:
|
|
64
|
+
raise
|
|
65
|
+
|
|
66
|
+
logger.warning(
|
|
67
|
+
"Embedding rate limited for model %s, retrying in %.3fs (attempt %d/%d)",
|
|
68
|
+
model,
|
|
69
|
+
delay,
|
|
70
|
+
attempt + 1,
|
|
71
|
+
_MAX_RATE_LIMIT_RETRIES,
|
|
72
|
+
)
|
|
73
|
+
await asyncio.sleep(delay)
|
|
74
|
+
|
|
75
|
+
assert last_exc is not None
|
|
76
|
+
raise last_exc
|
|
77
|
+
|
|
78
|
+
async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any:
|
|
79
|
+
lock = self._get_request_lock()
|
|
80
|
+
async with lock:
|
|
81
|
+
now = time.monotonic()
|
|
82
|
+
if self._next_request_at > now:
|
|
83
|
+
await asyncio.sleep(self._next_request_at - now)
|
|
84
|
+
|
|
85
|
+
response = await self._aembedding_with_rate_limit_retries(
|
|
86
|
+
model=self._model,
|
|
87
|
+
input=input,
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
now = time.monotonic()
|
|
92
|
+
if self._min_request_interval_seconds > 0:
|
|
93
|
+
self._next_request_at = now + self._min_request_interval_seconds
|
|
94
|
+
else:
|
|
95
|
+
self._next_request_at = now
|
|
96
|
+
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
async def _get_dim(self) -> int:
|
|
100
|
+
if self._dim is not None:
|
|
101
|
+
return self._dim
|
|
102
|
+
async with self._get_lock():
|
|
103
|
+
if self._dim is not None:
|
|
104
|
+
return self._dim
|
|
105
|
+
response = await self.run_embedding_request(input=["hello"], **self._kwargs)
|
|
106
|
+
embedding = response.data[0]["embedding"]
|
|
107
|
+
self._dim = len(embedding)
|
|
108
|
+
return self._dim
|
|
109
|
+
|
|
110
|
+
@coco.fn.as_async(
|
|
111
|
+
batching=True,
|
|
112
|
+
max_batch_size=64,
|
|
113
|
+
memo=True,
|
|
114
|
+
version=1,
|
|
115
|
+
logic_tracking="self",
|
|
116
|
+
)
|
|
117
|
+
async def embed(
|
|
118
|
+
self,
|
|
119
|
+
texts: list[str],
|
|
120
|
+
input_type: str | None = None,
|
|
121
|
+
) -> list[NDArray[np.float32]]:
|
|
122
|
+
kwargs = dict(self._kwargs)
|
|
123
|
+
if input_type is not None:
|
|
124
|
+
kwargs["input_type"] = input_type
|
|
125
|
+
response = await self.run_embedding_request(input=texts, **kwargs)
|
|
126
|
+
return [np.array(item["embedding"], dtype=np.float32) for item in response.data]
|
|
@@ -92,6 +92,7 @@ class EmbeddingSettings:
|
|
|
92
92
|
model: str
|
|
93
93
|
provider: str = "litellm"
|
|
94
94
|
device: str | None = None
|
|
95
|
+
min_interval_ms: int | None = None
|
|
95
96
|
|
|
96
97
|
|
|
97
98
|
@dataclass
|
|
@@ -351,6 +352,8 @@ def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
|
|
|
351
352
|
}
|
|
352
353
|
if settings.embedding.device is not None:
|
|
353
354
|
emb["device"] = settings.embedding.device
|
|
355
|
+
if settings.embedding.min_interval_ms is not None:
|
|
356
|
+
emb["min_interval_ms"] = settings.embedding.min_interval_ms
|
|
354
357
|
d["embedding"] = emb
|
|
355
358
|
if settings.envs:
|
|
356
359
|
d["envs"] = dict(settings.envs)
|
|
@@ -367,6 +370,8 @@ def _user_settings_from_dict(d: dict[str, Any]) -> UserSettings:
|
|
|
367
370
|
emb_kwargs["provider"] = emb_dict["provider"]
|
|
368
371
|
if "device" in emb_dict:
|
|
369
372
|
emb_kwargs["device"] = emb_dict["device"]
|
|
373
|
+
if "min_interval_ms" in emb_dict:
|
|
374
|
+
emb_kwargs["min_interval_ms"] = emb_dict["min_interval_ms"]
|
|
370
375
|
embedding = EmbeddingSettings(**emb_kwargs)
|
|
371
376
|
envs = d.get("envs", {})
|
|
372
377
|
return UserSettings(embedding=embedding, envs=envs)
|
|
@@ -21,6 +21,7 @@ from .settings import EmbeddingSettings
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
SBERT_PREFIX = "sbert/"
|
|
24
|
+
DEFAULT_LITELLM_MIN_INTERVAL_MS = 5
|
|
24
25
|
|
|
25
26
|
# Models that define a "query" prompt for asymmetric retrieval.
|
|
26
27
|
_QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"}
|
|
@@ -63,11 +64,23 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
|
|
|
63
64
|
)
|
|
64
65
|
logger.info("Embedding model: %s | device: %s", settings.model, settings.device)
|
|
65
66
|
else:
|
|
66
|
-
from
|
|
67
|
+
from .litellm_embedder import PacedLiteLLMEmbedder
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
min_interval_ms = (
|
|
70
|
+
settings.min_interval_ms
|
|
71
|
+
if settings.min_interval_ms is not None
|
|
72
|
+
else DEFAULT_LITELLM_MIN_INTERVAL_MS
|
|
73
|
+
)
|
|
74
|
+
instance = PacedLiteLLMEmbedder(
|
|
75
|
+
settings.model,
|
|
76
|
+
min_interval_ms=min_interval_ms,
|
|
77
|
+
)
|
|
69
78
|
query_prompt_name = None
|
|
70
|
-
logger.info(
|
|
79
|
+
logger.info(
|
|
80
|
+
"Embedding model (LiteLLM): %s | min_interval_ms: %s",
|
|
81
|
+
settings.model,
|
|
82
|
+
min_interval_ms,
|
|
83
|
+
)
|
|
71
84
|
|
|
72
85
|
embedder = instance
|
|
73
86
|
return instance
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools-scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
__all__ = [
|
|
5
|
-
"__version__",
|
|
6
|
-
"__version_tuple__",
|
|
7
|
-
"version",
|
|
8
|
-
"version_tuple",
|
|
9
|
-
"__commit_id__",
|
|
10
|
-
"commit_id",
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
TYPE_CHECKING = False
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from typing import Tuple
|
|
16
|
-
from typing import Union
|
|
17
|
-
|
|
18
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
-
COMMIT_ID = Union[str, None]
|
|
20
|
-
else:
|
|
21
|
-
VERSION_TUPLE = object
|
|
22
|
-
COMMIT_ID = object
|
|
23
|
-
|
|
24
|
-
version: str
|
|
25
|
-
__version__: str
|
|
26
|
-
__version_tuple__: VERSION_TUPLE
|
|
27
|
-
version_tuple: VERSION_TUPLE
|
|
28
|
-
commit_id: COMMIT_ID
|
|
29
|
-
__commit_id__: COMMIT_ID
|
|
30
|
-
|
|
31
|
-
__version__ = version = '0.2.10'
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 2, 10)
|
|
33
|
-
|
|
34
|
-
__commit_id__ = commit_id = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|