cocoindex-code 0.2.9__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/PKG-INFO +103 -2
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/README.md +101 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/pyproject.toml +1 -1
- cocoindex_code-0.2.11/src/cocoindex_code/_version.py +24 -0
- cocoindex_code-0.2.11/src/cocoindex_code/litellm_embedder.py +126 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/settings.py +5 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/shared.py +16 -3
- cocoindex_code-0.2.9/src/cocoindex_code/_version.py +0 -34
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/.gitignore +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/LICENSE +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/__init__.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/__main__.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/chunking.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/cli.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/client.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/daemon.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/indexer.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/project.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/protocol.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/query.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/schema.py +0 -0
- {cocoindex_code-0.2.9 → cocoindex_code-0.2.11}/src/cocoindex_code/server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cocoindex-code
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.11
|
|
4
4
|
Summary: MCP server for indexing and querying codebases using CocoIndex
|
|
5
5
|
Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
|
|
6
6
|
Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
|
|
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
19
|
Requires-Python: >=3.11
|
|
20
|
-
Requires-Dist: cocoindex[litellm]==1.0.
|
|
20
|
+
Requires-Dist: cocoindex[litellm]==1.0.0a38
|
|
21
21
|
Requires-Dist: einops>=0.8.2
|
|
22
22
|
Requires-Dist: mcp>=1.0.0
|
|
23
23
|
Requires-Dist: msgspec>=0.19.0
|
|
@@ -226,6 +226,105 @@ ccc search --refresh database schema # update index first, then
|
|
|
226
226
|
|
|
227
227
|
By default, `ccc search` scopes results to your current working directory (relative to the project root). Use `--path` to override.
|
|
228
228
|
|
|
229
|
+
## Docker
|
|
230
|
+
|
|
231
|
+
A Docker image is available for teams who want a reproducible, dependency-free
|
|
232
|
+
setup — no Python, `uv`, or system dependencies required on the host.
|
|
233
|
+
|
|
234
|
+
The recommended approach is a **persistent container**: start it once, and use
|
|
235
|
+
`docker exec` to run CLI commands or connect MCP sessions to it. The daemon
|
|
236
|
+
inside stays warm across sessions, so the embedding model is loaded only once.
|
|
237
|
+
|
|
238
|
+
### Step 1 — Start the container
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
docker run -d --name cocoindex-code \
|
|
242
|
+
--volume "$(pwd):/workspace" \
|
|
243
|
+
--volume cocoindex-db:/db \
|
|
244
|
+
--volume cocoindex-model-cache:/root/.cache \
|
|
245
|
+
ghcr.io/cocoindex-io/cocoindex-code:latest
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
- `/workspace` — mount your project root here
|
|
249
|
+
- `cocoindex-db` — index databases live inside the container (fast native I/O, no cross-OS volume issues)
|
|
250
|
+
- `cocoindex-model-cache` — persists the embedding model across image upgrades
|
|
251
|
+
|
|
252
|
+
### Step 2 — Index your codebase
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
docker exec -it cocoindex-code ccc index
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Step 3 — Connect your coding agent
|
|
259
|
+
|
|
260
|
+
<details>
|
|
261
|
+
<summary>Claude Code</summary>
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
claude mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Or via `.mcp.json`:
|
|
268
|
+
|
|
269
|
+
```json
|
|
270
|
+
{
|
|
271
|
+
"mcpServers": {
|
|
272
|
+
"cocoindex-code": {
|
|
273
|
+
"type": "stdio",
|
|
274
|
+
"command": "docker",
|
|
275
|
+
"args": ["exec", "-i", "cocoindex-code", "ccc", "mcp"]
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
</details>
|
|
281
|
+
|
|
282
|
+
<details>
|
|
283
|
+
<summary>Codex</summary>
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
codex mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
|
|
287
|
+
```
|
|
288
|
+
</details>
|
|
289
|
+
|
|
290
|
+
### CLI usage inside the container
|
|
291
|
+
|
|
292
|
+
All `ccc` commands work via `docker exec`:
|
|
293
|
+
|
|
294
|
+
```bash
|
|
295
|
+
docker exec -it cocoindex-code ccc index
|
|
296
|
+
docker exec -it cocoindex-code ccc search "authentication logic"
|
|
297
|
+
docker exec -it cocoindex-code ccc status
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Or set an alias on your host so it feels native:
|
|
301
|
+
|
|
302
|
+
```bash
|
|
303
|
+
alias ccc='docker exec -it cocoindex-code ccc'
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
### Configuration via environment variables
|
|
307
|
+
|
|
308
|
+
Pass configuration to `docker run` with `-e`:
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
# Extra extensions (e.g. Typesafe Config, SBT build files)
|
|
312
|
+
-e COCOINDEX_CODE_EXTRA_EXTENSIONS="conf,sbt"
|
|
313
|
+
|
|
314
|
+
# Exclude build artefacts (Scala/SBT example)
|
|
315
|
+
-e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
|
|
316
|
+
|
|
317
|
+
# Swap in a code-optimised embedding model
|
|
318
|
+
-e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
|
|
319
|
+
-e VOYAGE_API_KEY=your-key
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Build the image locally
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
docker build -t cocoindex-code:local -f docker/Dockerfile .
|
|
326
|
+
```
|
|
327
|
+
|
|
229
328
|
## Features
|
|
230
329
|
- **Semantic Code Search**: Find relevant code using natural language queries when grep doesn't work well, and save tokens immediately.
|
|
231
330
|
- **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
|
|
@@ -246,6 +345,7 @@ embedding:
|
|
|
246
345
|
provider: sentence-transformers # or "litellm"
|
|
247
346
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
248
347
|
device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
|
|
348
|
+
min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
|
|
249
349
|
|
|
250
350
|
envs: # extra environment variables for the daemon
|
|
251
351
|
OPENAI_API_KEY: your-key # only needed if not already in your shell environment
|
|
@@ -337,6 +437,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
|
|
|
337
437
|
```yaml
|
|
338
438
|
embedding:
|
|
339
439
|
model: text-embedding-3-small
|
|
440
|
+
min_interval_ms: 300 # optional: override the 5ms LiteLLM default
|
|
340
441
|
envs:
|
|
341
442
|
OPENAI_API_KEY: your-api-key
|
|
342
443
|
```
|
|
@@ -187,6 +187,105 @@ ccc search --refresh database schema # update index first, then
|
|
|
187
187
|
|
|
188
188
|
By default, `ccc search` scopes results to your current working directory (relative to the project root). Use `--path` to override.
|
|
189
189
|
|
|
190
|
+
## Docker
|
|
191
|
+
|
|
192
|
+
A Docker image is available for teams who want a reproducible, dependency-free
|
|
193
|
+
setup — no Python, `uv`, or system dependencies required on the host.
|
|
194
|
+
|
|
195
|
+
The recommended approach is a **persistent container**: start it once, and use
|
|
196
|
+
`docker exec` to run CLI commands or connect MCP sessions to it. The daemon
|
|
197
|
+
inside stays warm across sessions, so the embedding model is loaded only once.
|
|
198
|
+
|
|
199
|
+
### Step 1 — Start the container
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
docker run -d --name cocoindex-code \
|
|
203
|
+
--volume "$(pwd):/workspace" \
|
|
204
|
+
--volume cocoindex-db:/db \
|
|
205
|
+
--volume cocoindex-model-cache:/root/.cache \
|
|
206
|
+
ghcr.io/cocoindex-io/cocoindex-code:latest
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
- `/workspace` — mount your project root here
|
|
210
|
+
- `cocoindex-db` — index databases live inside the container (fast native I/O, no cross-OS volume issues)
|
|
211
|
+
- `cocoindex-model-cache` — persists the embedding model across image upgrades
|
|
212
|
+
|
|
213
|
+
### Step 2 — Index your codebase
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
docker exec -it cocoindex-code ccc index
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Step 3 — Connect your coding agent
|
|
220
|
+
|
|
221
|
+
<details>
|
|
222
|
+
<summary>Claude Code</summary>
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
claude mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Or via `.mcp.json`:
|
|
229
|
+
|
|
230
|
+
```json
|
|
231
|
+
{
|
|
232
|
+
"mcpServers": {
|
|
233
|
+
"cocoindex-code": {
|
|
234
|
+
"type": "stdio",
|
|
235
|
+
"command": "docker",
|
|
236
|
+
"args": ["exec", "-i", "cocoindex-code", "ccc", "mcp"]
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
```
|
|
241
|
+
</details>
|
|
242
|
+
|
|
243
|
+
<details>
|
|
244
|
+
<summary>Codex</summary>
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
codex mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
|
|
248
|
+
```
|
|
249
|
+
</details>
|
|
250
|
+
|
|
251
|
+
### CLI usage inside the container
|
|
252
|
+
|
|
253
|
+
All `ccc` commands work via `docker exec`:
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
docker exec -it cocoindex-code ccc index
|
|
257
|
+
docker exec -it cocoindex-code ccc search "authentication logic"
|
|
258
|
+
docker exec -it cocoindex-code ccc status
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
Or set an alias on your host so it feels native:
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
alias ccc='docker exec -it cocoindex-code ccc'
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Configuration via environment variables
|
|
268
|
+
|
|
269
|
+
Pass configuration to `docker run` with `-e`:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# Extra extensions (e.g. Typesafe Config, SBT build files)
|
|
273
|
+
-e COCOINDEX_CODE_EXTRA_EXTENSIONS="conf,sbt"
|
|
274
|
+
|
|
275
|
+
# Exclude build artefacts (Scala/SBT example)
|
|
276
|
+
-e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
|
|
277
|
+
|
|
278
|
+
# Swap in a code-optimised embedding model
|
|
279
|
+
-e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
|
|
280
|
+
-e VOYAGE_API_KEY=your-key
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Build the image locally
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
docker build -t cocoindex-code:local -f docker/Dockerfile .
|
|
287
|
+
```
|
|
288
|
+
|
|
190
289
|
## Features
|
|
191
290
|
- **Semantic Code Search**: Find relevant code using natural language queries when grep doesn't work well, and save tokens immediately.
|
|
192
291
|
- **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
|
|
@@ -207,6 +306,7 @@ embedding:
|
|
|
207
306
|
provider: sentence-transformers # or "litellm"
|
|
208
307
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
209
308
|
device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
|
|
309
|
+
min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
|
|
210
310
|
|
|
211
311
|
envs: # extra environment variables for the daemon
|
|
212
312
|
OPENAI_API_KEY: your-key # only needed if not already in your shell environment
|
|
@@ -298,6 +398,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
|
|
|
298
398
|
```yaml
|
|
299
399
|
embedding:
|
|
300
400
|
model: text-embedding-3-small
|
|
401
|
+
min_interval_ms: 300 # optional: override the 5ms LiteLLM default
|
|
301
402
|
envs:
|
|
302
403
|
OPENAI_API_KEY: your-api-key
|
|
303
404
|
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.2.11'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 11)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""LiteLLM embedder with optional request pacing and rate-limit retries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import cocoindex as coco
|
|
12
|
+
import numpy as np
|
|
13
|
+
from cocoindex.ops.litellm import LiteLLMEmbedder, litellm
|
|
14
|
+
from numpy.typing import NDArray
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
_RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE)
|
|
19
|
+
_MAX_RATE_LIMIT_RETRIES = 6
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_rate_limit_delay(exc: Exception, attempt: int) -> float | None:
|
|
23
|
+
message = str(exc)
|
|
24
|
+
if "rate limit" not in message.lower():
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
match = _RATE_LIMIT_DELAY_RE.search(message)
|
|
28
|
+
if match is not None:
|
|
29
|
+
value = float(match.group(1))
|
|
30
|
+
unit = match.group(2).lower()
|
|
31
|
+
delay = value / 1000.0 if unit == "ms" else value
|
|
32
|
+
else:
|
|
33
|
+
delay = min(0.5 * (2**attempt), 10.0)
|
|
34
|
+
|
|
35
|
+
return min(delay + 0.1, 10.0)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PacedLiteLLMEmbedder(LiteLLMEmbedder):
|
|
39
|
+
"""LiteLLM embedder that serializes requests and paces them when configured."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model: str, *, min_interval_ms: int | None = None, **kwargs: Any) -> None:
|
|
42
|
+
super().__init__(model, **kwargs)
|
|
43
|
+
self._min_request_interval_seconds = max(0.0, float(min_interval_ms or 0) / 1000.0)
|
|
44
|
+
self._request_lock: asyncio.Lock | None = None
|
|
45
|
+
self._next_request_at: float = 0.0
|
|
46
|
+
|
|
47
|
+
def _get_request_lock(self) -> asyncio.Lock:
|
|
48
|
+
if self._request_lock is None:
|
|
49
|
+
self._request_lock = asyncio.Lock()
|
|
50
|
+
return self._request_lock
|
|
51
|
+
|
|
52
|
+
async def _aembedding_with_rate_limit_retries(
|
|
53
|
+
self, *, model: str, input: list[str], **kwargs: Any
|
|
54
|
+
) -> Any:
|
|
55
|
+
last_exc: Exception | None = None
|
|
56
|
+
|
|
57
|
+
for attempt in range(_MAX_RATE_LIMIT_RETRIES):
|
|
58
|
+
try:
|
|
59
|
+
return await litellm.aembedding(model=model, input=input, **kwargs)
|
|
60
|
+
except Exception as exc: # noqa: BLE001
|
|
61
|
+
delay = _get_rate_limit_delay(exc, attempt)
|
|
62
|
+
last_exc = exc
|
|
63
|
+
if delay is None or attempt == _MAX_RATE_LIMIT_RETRIES - 1:
|
|
64
|
+
raise
|
|
65
|
+
|
|
66
|
+
logger.warning(
|
|
67
|
+
"Embedding rate limited for model %s, retrying in %.3fs (attempt %d/%d)",
|
|
68
|
+
model,
|
|
69
|
+
delay,
|
|
70
|
+
attempt + 1,
|
|
71
|
+
_MAX_RATE_LIMIT_RETRIES,
|
|
72
|
+
)
|
|
73
|
+
await asyncio.sleep(delay)
|
|
74
|
+
|
|
75
|
+
assert last_exc is not None
|
|
76
|
+
raise last_exc
|
|
77
|
+
|
|
78
|
+
async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any:
|
|
79
|
+
lock = self._get_request_lock()
|
|
80
|
+
async with lock:
|
|
81
|
+
now = time.monotonic()
|
|
82
|
+
if self._next_request_at > now:
|
|
83
|
+
await asyncio.sleep(self._next_request_at - now)
|
|
84
|
+
|
|
85
|
+
response = await self._aembedding_with_rate_limit_retries(
|
|
86
|
+
model=self._model,
|
|
87
|
+
input=input,
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
now = time.monotonic()
|
|
92
|
+
if self._min_request_interval_seconds > 0:
|
|
93
|
+
self._next_request_at = now + self._min_request_interval_seconds
|
|
94
|
+
else:
|
|
95
|
+
self._next_request_at = now
|
|
96
|
+
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
async def _get_dim(self) -> int:
|
|
100
|
+
if self._dim is not None:
|
|
101
|
+
return self._dim
|
|
102
|
+
async with self._get_lock():
|
|
103
|
+
if self._dim is not None:
|
|
104
|
+
return self._dim
|
|
105
|
+
response = await self.run_embedding_request(input=["hello"], **self._kwargs)
|
|
106
|
+
embedding = response.data[0]["embedding"]
|
|
107
|
+
self._dim = len(embedding)
|
|
108
|
+
return self._dim
|
|
109
|
+
|
|
110
|
+
@coco.fn.as_async(
|
|
111
|
+
batching=True,
|
|
112
|
+
max_batch_size=64,
|
|
113
|
+
memo=True,
|
|
114
|
+
version=1,
|
|
115
|
+
logic_tracking="self",
|
|
116
|
+
)
|
|
117
|
+
async def embed(
|
|
118
|
+
self,
|
|
119
|
+
texts: list[str],
|
|
120
|
+
input_type: str | None = None,
|
|
121
|
+
) -> list[NDArray[np.float32]]:
|
|
122
|
+
kwargs = dict(self._kwargs)
|
|
123
|
+
if input_type is not None:
|
|
124
|
+
kwargs["input_type"] = input_type
|
|
125
|
+
response = await self.run_embedding_request(input=texts, **kwargs)
|
|
126
|
+
return [np.array(item["embedding"], dtype=np.float32) for item in response.data]
|
|
@@ -92,6 +92,7 @@ class EmbeddingSettings:
|
|
|
92
92
|
model: str
|
|
93
93
|
provider: str = "litellm"
|
|
94
94
|
device: str | None = None
|
|
95
|
+
min_interval_ms: int | None = None
|
|
95
96
|
|
|
96
97
|
|
|
97
98
|
@dataclass
|
|
@@ -351,6 +352,8 @@ def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
|
|
|
351
352
|
}
|
|
352
353
|
if settings.embedding.device is not None:
|
|
353
354
|
emb["device"] = settings.embedding.device
|
|
355
|
+
if settings.embedding.min_interval_ms is not None:
|
|
356
|
+
emb["min_interval_ms"] = settings.embedding.min_interval_ms
|
|
354
357
|
d["embedding"] = emb
|
|
355
358
|
if settings.envs:
|
|
356
359
|
d["envs"] = dict(settings.envs)
|
|
@@ -367,6 +370,8 @@ def _user_settings_from_dict(d: dict[str, Any]) -> UserSettings:
|
|
|
367
370
|
emb_kwargs["provider"] = emb_dict["provider"]
|
|
368
371
|
if "device" in emb_dict:
|
|
369
372
|
emb_kwargs["device"] = emb_dict["device"]
|
|
373
|
+
if "min_interval_ms" in emb_dict:
|
|
374
|
+
emb_kwargs["min_interval_ms"] = emb_dict["min_interval_ms"]
|
|
370
375
|
embedding = EmbeddingSettings(**emb_kwargs)
|
|
371
376
|
envs = d.get("envs", {})
|
|
372
377
|
return UserSettings(embedding=embedding, envs=envs)
|
|
@@ -21,6 +21,7 @@ from .settings import EmbeddingSettings
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
SBERT_PREFIX = "sbert/"
|
|
24
|
+
DEFAULT_LITELLM_MIN_INTERVAL_MS = 5
|
|
24
25
|
|
|
25
26
|
# Models that define a "query" prompt for asymmetric retrieval.
|
|
26
27
|
_QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"}
|
|
@@ -63,11 +64,23 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
|
|
|
63
64
|
)
|
|
64
65
|
logger.info("Embedding model: %s | device: %s", settings.model, settings.device)
|
|
65
66
|
else:
|
|
66
|
-
from
|
|
67
|
+
from .litellm_embedder import PacedLiteLLMEmbedder
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
min_interval_ms = (
|
|
70
|
+
settings.min_interval_ms
|
|
71
|
+
if settings.min_interval_ms is not None
|
|
72
|
+
else DEFAULT_LITELLM_MIN_INTERVAL_MS
|
|
73
|
+
)
|
|
74
|
+
instance = PacedLiteLLMEmbedder(
|
|
75
|
+
settings.model,
|
|
76
|
+
min_interval_ms=min_interval_ms,
|
|
77
|
+
)
|
|
69
78
|
query_prompt_name = None
|
|
70
|
-
logger.info(
|
|
79
|
+
logger.info(
|
|
80
|
+
"Embedding model (LiteLLM): %s | min_interval_ms: %s",
|
|
81
|
+
settings.model,
|
|
82
|
+
min_interval_ms,
|
|
83
|
+
)
|
|
71
84
|
|
|
72
85
|
embedder = instance
|
|
73
86
|
return instance
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools-scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
__all__ = [
|
|
5
|
-
"__version__",
|
|
6
|
-
"__version_tuple__",
|
|
7
|
-
"version",
|
|
8
|
-
"version_tuple",
|
|
9
|
-
"__commit_id__",
|
|
10
|
-
"commit_id",
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
TYPE_CHECKING = False
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from typing import Tuple
|
|
16
|
-
from typing import Union
|
|
17
|
-
|
|
18
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
-
COMMIT_ID = Union[str, None]
|
|
20
|
-
else:
|
|
21
|
-
VERSION_TUPLE = object
|
|
22
|
-
COMMIT_ID = object
|
|
23
|
-
|
|
24
|
-
version: str
|
|
25
|
-
__version__: str
|
|
26
|
-
__version_tuple__: VERSION_TUPLE
|
|
27
|
-
version_tuple: VERSION_TUPLE
|
|
28
|
-
commit_id: COMMIT_ID
|
|
29
|
-
__commit_id__: COMMIT_ID
|
|
30
|
-
|
|
31
|
-
__version__ = version = '0.2.9'
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 2, 9)
|
|
33
|
-
|
|
34
|
-
__commit_id__ = commit_id = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|