cocoindex-code 0.2.10__tar.gz → 0.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/PKG-INFO +3 -1
  2. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/README.md +2 -0
  3. cocoindex_code-0.2.11/src/cocoindex_code/_version.py +24 -0
  4. cocoindex_code-0.2.11/src/cocoindex_code/litellm_embedder.py +126 -0
  5. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/settings.py +5 -0
  6. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/shared.py +16 -3
  7. cocoindex_code-0.2.10/src/cocoindex_code/_version.py +0 -34
  8. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/.gitignore +0 -0
  9. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/LICENSE +0 -0
  10. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/pyproject.toml +0 -0
  11. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/__init__.py +0 -0
  12. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/__main__.py +0 -0
  13. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/chunking.py +0 -0
  14. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/cli.py +0 -0
  15. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/client.py +0 -0
  16. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/daemon.py +0 -0
  17. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/indexer.py +0 -0
  18. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/project.py +0 -0
  19. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/protocol.py +0 -0
  20. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/query.py +0 -0
  21. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/schema.py +0 -0
  22. {cocoindex_code-0.2.10 → cocoindex_code-0.2.11}/src/cocoindex_code/server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex-code
3
- Version: 0.2.10
3
+ Version: 0.2.11
4
4
  Summary: MCP server for indexing and querying codebases using CocoIndex
5
5
  Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
6
6
  Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
@@ -345,6 +345,7 @@ embedding:
345
345
  provider: sentence-transformers # or "litellm"
346
346
  model: sentence-transformers/all-MiniLM-L6-v2
347
347
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
348
+ min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
348
349
 
349
350
  envs: # extra environment variables for the daemon
350
351
  OPENAI_API_KEY: your-key # only needed if not already in your shell environment
@@ -436,6 +437,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
436
437
  ```yaml
437
438
  embedding:
438
439
  model: text-embedding-3-small
440
+ min_interval_ms: 300 # optional: override the 5ms LiteLLM default
439
441
  envs:
440
442
  OPENAI_API_KEY: your-api-key
441
443
  ```
@@ -306,6 +306,7 @@ embedding:
306
306
  provider: sentence-transformers # or "litellm"
307
307
  model: sentence-transformers/all-MiniLM-L6-v2
308
308
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
309
+ min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
309
310
 
310
311
  envs: # extra environment variables for the daemon
311
312
  OPENAI_API_KEY: your-key # only needed if not already in your shell environment
@@ -397,6 +398,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
397
398
  ```yaml
398
399
  embedding:
399
400
  model: text-embedding-3-small
401
+ min_interval_ms: 300 # optional: override the 5ms LiteLLM default
400
402
  envs:
401
403
  OPENAI_API_KEY: your-api-key
402
404
  ```
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.2.11'
22
+ __version_tuple__ = version_tuple = (0, 2, 11)
23
+
24
+ __commit_id__ = commit_id = None
@@ -0,0 +1,126 @@
1
+ """LiteLLM embedder with optional request pacing and rate-limit retries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import re
8
+ import time
9
+ from typing import Any
10
+
11
+ import cocoindex as coco
12
+ import numpy as np
13
+ from cocoindex.ops.litellm import LiteLLMEmbedder, litellm
14
+ from numpy.typing import NDArray
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ _RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE)
19
+ _MAX_RATE_LIMIT_RETRIES = 6
20
+
21
+
22
+ def _get_rate_limit_delay(exc: Exception, attempt: int) -> float | None:
23
+ message = str(exc)
24
+ if "rate limit" not in message.lower():
25
+ return None
26
+
27
+ match = _RATE_LIMIT_DELAY_RE.search(message)
28
+ if match is not None:
29
+ value = float(match.group(1))
30
+ unit = match.group(2).lower()
31
+ delay = value / 1000.0 if unit == "ms" else value
32
+ else:
33
+ delay = min(0.5 * (2**attempt), 10.0)
34
+
35
+ return min(delay + 0.1, 10.0)
36
+
37
+
38
+ class PacedLiteLLMEmbedder(LiteLLMEmbedder):
39
+ """LiteLLM embedder that serializes requests and paces them when configured."""
40
+
41
+ def __init__(self, model: str, *, min_interval_ms: int | None = None, **kwargs: Any) -> None:
42
+ super().__init__(model, **kwargs)
43
+ self._min_request_interval_seconds = max(0.0, float(min_interval_ms or 0) / 1000.0)
44
+ self._request_lock: asyncio.Lock | None = None
45
+ self._next_request_at: float = 0.0
46
+
47
+ def _get_request_lock(self) -> asyncio.Lock:
48
+ if self._request_lock is None:
49
+ self._request_lock = asyncio.Lock()
50
+ return self._request_lock
51
+
52
+ async def _aembedding_with_rate_limit_retries(
53
+ self, *, model: str, input: list[str], **kwargs: Any
54
+ ) -> Any:
55
+ last_exc: Exception | None = None
56
+
57
+ for attempt in range(_MAX_RATE_LIMIT_RETRIES):
58
+ try:
59
+ return await litellm.aembedding(model=model, input=input, **kwargs)
60
+ except Exception as exc: # noqa: BLE001
61
+ delay = _get_rate_limit_delay(exc, attempt)
62
+ last_exc = exc
63
+ if delay is None or attempt == _MAX_RATE_LIMIT_RETRIES - 1:
64
+ raise
65
+
66
+ logger.warning(
67
+ "Embedding rate limited for model %s, retrying in %.3fs (attempt %d/%d)",
68
+ model,
69
+ delay,
70
+ attempt + 1,
71
+ _MAX_RATE_LIMIT_RETRIES,
72
+ )
73
+ await asyncio.sleep(delay)
74
+
75
+ assert last_exc is not None
76
+ raise last_exc
77
+
78
+ async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any:
79
+ lock = self._get_request_lock()
80
+ async with lock:
81
+ now = time.monotonic()
82
+ if self._next_request_at > now:
83
+ await asyncio.sleep(self._next_request_at - now)
84
+
85
+ response = await self._aembedding_with_rate_limit_retries(
86
+ model=self._model,
87
+ input=input,
88
+ **kwargs,
89
+ )
90
+
91
+ now = time.monotonic()
92
+ if self._min_request_interval_seconds > 0:
93
+ self._next_request_at = now + self._min_request_interval_seconds
94
+ else:
95
+ self._next_request_at = now
96
+
97
+ return response
98
+
99
+ async def _get_dim(self) -> int:
100
+ if self._dim is not None:
101
+ return self._dim
102
+ async with self._get_lock():
103
+ if self._dim is not None:
104
+ return self._dim
105
+ response = await self.run_embedding_request(input=["hello"], **self._kwargs)
106
+ embedding = response.data[0]["embedding"]
107
+ self._dim = len(embedding)
108
+ return self._dim
109
+
110
+ @coco.fn.as_async(
111
+ batching=True,
112
+ max_batch_size=64,
113
+ memo=True,
114
+ version=1,
115
+ logic_tracking="self",
116
+ )
117
+ async def embed(
118
+ self,
119
+ texts: list[str],
120
+ input_type: str | None = None,
121
+ ) -> list[NDArray[np.float32]]:
122
+ kwargs = dict(self._kwargs)
123
+ if input_type is not None:
124
+ kwargs["input_type"] = input_type
125
+ response = await self.run_embedding_request(input=texts, **kwargs)
126
+ return [np.array(item["embedding"], dtype=np.float32) for item in response.data]
@@ -92,6 +92,7 @@ class EmbeddingSettings:
92
92
  model: str
93
93
  provider: str = "litellm"
94
94
  device: str | None = None
95
+ min_interval_ms: int | None = None
95
96
 
96
97
 
97
98
  @dataclass
@@ -351,6 +352,8 @@ def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
351
352
  }
352
353
  if settings.embedding.device is not None:
353
354
  emb["device"] = settings.embedding.device
355
+ if settings.embedding.min_interval_ms is not None:
356
+ emb["min_interval_ms"] = settings.embedding.min_interval_ms
354
357
  d["embedding"] = emb
355
358
  if settings.envs:
356
359
  d["envs"] = dict(settings.envs)
@@ -367,6 +370,8 @@ def _user_settings_from_dict(d: dict[str, Any]) -> UserSettings:
367
370
  emb_kwargs["provider"] = emb_dict["provider"]
368
371
  if "device" in emb_dict:
369
372
  emb_kwargs["device"] = emb_dict["device"]
373
+ if "min_interval_ms" in emb_dict:
374
+ emb_kwargs["min_interval_ms"] = emb_dict["min_interval_ms"]
370
375
  embedding = EmbeddingSettings(**emb_kwargs)
371
376
  envs = d.get("envs", {})
372
377
  return UserSettings(embedding=embedding, envs=envs)
@@ -21,6 +21,7 @@ from .settings import EmbeddingSettings
21
21
  logger = logging.getLogger(__name__)
22
22
 
23
23
  SBERT_PREFIX = "sbert/"
24
+ DEFAULT_LITELLM_MIN_INTERVAL_MS = 5
24
25
 
25
26
  # Models that define a "query" prompt for asymmetric retrieval.
26
27
  _QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"}
@@ -63,11 +64,23 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
63
64
  )
64
65
  logger.info("Embedding model: %s | device: %s", settings.model, settings.device)
65
66
  else:
66
- from cocoindex.ops.litellm import LiteLLMEmbedder
67
+ from .litellm_embedder import PacedLiteLLMEmbedder
67
68
 
68
- instance = LiteLLMEmbedder(settings.model)
69
+ min_interval_ms = (
70
+ settings.min_interval_ms
71
+ if settings.min_interval_ms is not None
72
+ else DEFAULT_LITELLM_MIN_INTERVAL_MS
73
+ )
74
+ instance = PacedLiteLLMEmbedder(
75
+ settings.model,
76
+ min_interval_ms=min_interval_ms,
77
+ )
69
78
  query_prompt_name = None
70
- logger.info("Embedding model (LiteLLM): %s", settings.model)
79
+ logger.info(
80
+ "Embedding model (LiteLLM): %s | min_interval_ms: %s",
81
+ settings.model,
82
+ min_interval_ms,
83
+ )
71
84
 
72
85
  embedder = instance
73
86
  return instance
@@ -1,34 +0,0 @@
1
- # file generated by setuptools-scm
2
- # don't change, don't track in version control
3
-
4
- __all__ = [
5
- "__version__",
6
- "__version_tuple__",
7
- "version",
8
- "version_tuple",
9
- "__commit_id__",
10
- "commit_id",
11
- ]
12
-
13
- TYPE_CHECKING = False
14
- if TYPE_CHECKING:
15
- from typing import Tuple
16
- from typing import Union
17
-
18
- VERSION_TUPLE = Tuple[Union[int, str], ...]
19
- COMMIT_ID = Union[str, None]
20
- else:
21
- VERSION_TUPLE = object
22
- COMMIT_ID = object
23
-
24
- version: str
25
- __version__: str
26
- __version_tuple__: VERSION_TUPLE
27
- version_tuple: VERSION_TUPLE
28
- commit_id: COMMIT_ID
29
- __commit_id__: COMMIT_ID
30
-
31
- __version__ = version = '0.2.10'
32
- __version_tuple__ = version_tuple = (0, 2, 10)
33
-
34
- __commit_id__ = commit_id = None
File without changes