cocoindex-code 0.2.10__tar.gz → 0.2.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/PKG-INFO +4 -2
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/README.md +2 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/pyproject.toml +1 -1
- cocoindex_code-0.2.22/src/cocoindex_code/_daemon_paths.py +44 -0
- cocoindex_code-0.2.22/src/cocoindex_code/_version.py +24 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/cli.py +2 -2
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/client.py +10 -6
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/daemon.py +8 -39
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/indexer.py +3 -2
- cocoindex_code-0.2.22/src/cocoindex_code/litellm_embedder.py +126 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/settings.py +5 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/shared.py +16 -3
- cocoindex_code-0.2.10/src/cocoindex_code/_version.py +0 -34
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/.gitignore +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/LICENSE +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/__init__.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/__main__.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/chunking.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/project.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/protocol.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/query.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/schema.py +0 -0
- {cocoindex_code-0.2.10 → cocoindex_code-0.2.22}/src/cocoindex_code/server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cocoindex-code
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.22
|
|
4
4
|
Summary: MCP server for indexing and querying codebases using CocoIndex
|
|
5
5
|
Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
|
|
6
6
|
Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
|
|
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
19
|
Requires-Python: >=3.11
|
|
20
|
-
Requires-Dist: cocoindex[litellm]==1.0.
|
|
20
|
+
Requires-Dist: cocoindex[litellm]==1.0.0a43
|
|
21
21
|
Requires-Dist: einops>=0.8.2
|
|
22
22
|
Requires-Dist: mcp>=1.0.0
|
|
23
23
|
Requires-Dist: msgspec>=0.19.0
|
|
@@ -345,6 +345,7 @@ embedding:
|
|
|
345
345
|
provider: sentence-transformers # or "litellm"
|
|
346
346
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
347
347
|
device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
|
|
348
|
+
min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
|
|
348
349
|
|
|
349
350
|
envs: # extra environment variables for the daemon
|
|
350
351
|
OPENAI_API_KEY: your-key # only needed if not already in your shell environment
|
|
@@ -436,6 +437,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
|
|
|
436
437
|
```yaml
|
|
437
438
|
embedding:
|
|
438
439
|
model: text-embedding-3-small
|
|
440
|
+
min_interval_ms: 300 # optional: override the 5ms LiteLLM default
|
|
439
441
|
envs:
|
|
440
442
|
OPENAI_API_KEY: your-api-key
|
|
441
443
|
```
|
|
@@ -306,6 +306,7 @@ embedding:
|
|
|
306
306
|
provider: sentence-transformers # or "litellm"
|
|
307
307
|
model: sentence-transformers/all-MiniLM-L6-v2
|
|
308
308
|
device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
|
|
309
|
+
min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
|
|
309
310
|
|
|
310
311
|
envs: # extra environment variables for the daemon
|
|
311
312
|
OPENAI_API_KEY: your-key # only needed if not already in your shell environment
|
|
@@ -397,6 +398,7 @@ Set `OLLAMA_API_BASE` in `envs:` if your Ollama server is not at `http://localho
|
|
|
397
398
|
```yaml
|
|
398
399
|
embedding:
|
|
399
400
|
model: text-embedding-3-small
|
|
401
|
+
min_interval_ms: 300 # optional: override the 5ms LiteLLM default
|
|
400
402
|
envs:
|
|
401
403
|
OPENAI_API_KEY: your-api-key
|
|
402
404
|
```
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Daemon filesystem paths and connection helpers.
|
|
2
|
+
|
|
3
|
+
Lightweight module with no cocoindex dependency so that the CLI client
|
|
4
|
+
can import these without pulling in the full daemon stack.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .settings import user_settings_dir
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def daemon_dir() -> Path:
|
|
16
|
+
"""Return the daemon directory (``~/.cocoindex_code/``)."""
|
|
17
|
+
return user_settings_dir()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def connection_family() -> str:
|
|
21
|
+
"""Return the multiprocessing connection family for this platform."""
|
|
22
|
+
return "AF_PIPE" if sys.platform == "win32" else "AF_UNIX"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def daemon_socket_path() -> str:
|
|
26
|
+
"""Return the daemon socket/pipe address."""
|
|
27
|
+
if sys.platform == "win32":
|
|
28
|
+
import hashlib
|
|
29
|
+
|
|
30
|
+
# Hash the daemon dir so COCOINDEX_CODE_DIR overrides create unique pipe names,
|
|
31
|
+
# preventing conflicts between different daemon instances (tests, users, etc.)
|
|
32
|
+
dir_hash = hashlib.md5(str(daemon_dir()).encode()).hexdigest()[:12]
|
|
33
|
+
return rf"\\.\pipe\cocoindex_code_{dir_hash}"
|
|
34
|
+
return str(daemon_dir() / "daemon.sock")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def daemon_pid_path() -> Path:
|
|
38
|
+
"""Return the path for the daemon's PID file."""
|
|
39
|
+
return daemon_dir() / "daemon.pid"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def daemon_log_path() -> Path:
|
|
43
|
+
"""Return the path for the daemon's log file."""
|
|
44
|
+
return daemon_dir() / "daemon.log"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.2.22'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 22)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
|
@@ -599,7 +599,7 @@ def doctor() -> None:
|
|
|
599
599
|
|
|
600
600
|
# --- 8. Log files ---
|
|
601
601
|
_print_section("Log Files")
|
|
602
|
-
from .
|
|
602
|
+
from ._daemon_paths import daemon_log_path as _daemon_log_path
|
|
603
603
|
|
|
604
604
|
_typer.echo(f" Daemon logs: {_daemon_log_path()}")
|
|
605
605
|
_typer.echo(" Check logs above for further troubleshooting.")
|
|
@@ -675,8 +675,8 @@ def daemon_restart() -> None:
|
|
|
675
675
|
@daemon_app.command("stop")
|
|
676
676
|
def daemon_stop() -> None:
|
|
677
677
|
"""Stop the daemon."""
|
|
678
|
+
from ._daemon_paths import daemon_pid_path
|
|
678
679
|
from .client import is_daemon_running, stop_daemon
|
|
679
|
-
from .daemon import daemon_pid_path
|
|
680
680
|
|
|
681
681
|
pid_path = daemon_pid_path()
|
|
682
682
|
if not pid_path.exists() and not is_daemon_running():
|
|
@@ -17,8 +17,14 @@ from collections.abc import Callable
|
|
|
17
17
|
from multiprocessing.connection import Client, Connection
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
|
|
20
|
+
from ._daemon_paths import (
|
|
21
|
+
connection_family,
|
|
22
|
+
daemon_dir,
|
|
23
|
+
daemon_log_path,
|
|
24
|
+
daemon_pid_path,
|
|
25
|
+
daemon_socket_path,
|
|
26
|
+
)
|
|
20
27
|
from ._version import __version__
|
|
21
|
-
from .daemon import _connection_family, daemon_log_path, daemon_pid_path, daemon_socket_path
|
|
22
28
|
from .protocol import (
|
|
23
29
|
DaemonEnvRequest,
|
|
24
30
|
DaemonEnvResponse,
|
|
@@ -105,7 +111,7 @@ def _raw_connect_and_handshake() -> Connection:
|
|
|
105
111
|
if sys.platform != "win32" and not os.path.exists(sock):
|
|
106
112
|
raise ConnectionRefusedError(f"Daemon socket not found: {sock}")
|
|
107
113
|
try:
|
|
108
|
-
conn = Client(sock, family=
|
|
114
|
+
conn = Client(sock, family=connection_family())
|
|
109
115
|
except (ConnectionRefusedError, FileNotFoundError, OSError) as e:
|
|
110
116
|
raise ConnectionRefusedError(f"Cannot connect to daemon: {e}") from e
|
|
111
117
|
|
|
@@ -329,7 +335,7 @@ def is_daemon_running() -> bool:
|
|
|
329
335
|
"""Check if the daemon is running."""
|
|
330
336
|
if sys.platform == "win32":
|
|
331
337
|
try:
|
|
332
|
-
conn = Client(daemon_socket_path(), family=
|
|
338
|
+
conn = Client(daemon_socket_path(), family=connection_family())
|
|
333
339
|
conn.close()
|
|
334
340
|
return True
|
|
335
341
|
except (ConnectionRefusedError, OSError):
|
|
@@ -343,8 +349,6 @@ def start_daemon() -> subprocess.Popen[bytes]:
|
|
|
343
349
|
Returns the ``Popen`` object so callers can detect early process death
|
|
344
350
|
(via ``proc.poll()``) instead of waiting for a full timeout.
|
|
345
351
|
"""
|
|
346
|
-
from .daemon import daemon_dir, daemon_log_path
|
|
347
|
-
|
|
348
352
|
daemon_dir().mkdir(parents=True, exist_ok=True)
|
|
349
353
|
log_path = daemon_log_path()
|
|
350
354
|
|
|
@@ -518,7 +522,7 @@ def _wait_for_daemon(
|
|
|
518
522
|
|
|
519
523
|
if sys.platform == "win32":
|
|
520
524
|
try:
|
|
521
|
-
conn = Client(sock_path, family=
|
|
525
|
+
conn = Client(sock_path, family=connection_family())
|
|
522
526
|
conn.close()
|
|
523
527
|
return
|
|
524
528
|
except (ConnectionRefusedError, OSError):
|
|
@@ -15,6 +15,13 @@ from multiprocessing.connection import Connection, Listener
|
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Any
|
|
17
17
|
|
|
18
|
+
from ._daemon_paths import (
|
|
19
|
+
connection_family,
|
|
20
|
+
daemon_dir,
|
|
21
|
+
daemon_log_path,
|
|
22
|
+
daemon_pid_path,
|
|
23
|
+
daemon_socket_path,
|
|
24
|
+
)
|
|
18
25
|
from ._version import __version__
|
|
19
26
|
from .chunking import ChunkerFn as _ChunkerFn
|
|
20
27
|
from .project import Project
|
|
@@ -53,7 +60,6 @@ from .settings import (
|
|
|
53
60
|
load_project_settings,
|
|
54
61
|
load_user_settings,
|
|
55
62
|
target_sqlite_db_path,
|
|
56
|
-
user_settings_dir,
|
|
57
63
|
)
|
|
58
64
|
from .shared import Embedder, create_embedder
|
|
59
65
|
|
|
@@ -79,43 +85,6 @@ def _resolve_chunker_registry(mappings: list[ChunkerMapping]) -> dict[str, _Chun
|
|
|
79
85
|
return registry
|
|
80
86
|
|
|
81
87
|
|
|
82
|
-
# ---------------------------------------------------------------------------
|
|
83
|
-
# Daemon paths
|
|
84
|
-
# ---------------------------------------------------------------------------
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def daemon_dir() -> Path:
|
|
88
|
-
"""Return the daemon directory (``~/.cocoindex_code/``)."""
|
|
89
|
-
return user_settings_dir()
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def _connection_family() -> str:
|
|
93
|
-
"""Return the multiprocessing connection family for this platform."""
|
|
94
|
-
return "AF_PIPE" if sys.platform == "win32" else "AF_UNIX"
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def daemon_socket_path() -> str:
|
|
98
|
-
"""Return the daemon socket/pipe address."""
|
|
99
|
-
if sys.platform == "win32":
|
|
100
|
-
import hashlib
|
|
101
|
-
|
|
102
|
-
# Hash the daemon dir so COCOINDEX_CODE_DIR overrides create unique pipe names,
|
|
103
|
-
# preventing conflicts between different daemon instances (tests, users, etc.)
|
|
104
|
-
dir_hash = hashlib.md5(str(daemon_dir()).encode()).hexdigest()[:12]
|
|
105
|
-
return rf"\\.\pipe\cocoindex_code_{dir_hash}"
|
|
106
|
-
return str(daemon_dir() / "daemon.sock")
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def daemon_pid_path() -> Path:
|
|
110
|
-
"""Return the path for the daemon's PID file."""
|
|
111
|
-
return daemon_dir() / "daemon.pid"
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def daemon_log_path() -> Path:
|
|
115
|
-
"""Return the path for the daemon's log file."""
|
|
116
|
-
return daemon_dir() / "daemon.log"
|
|
117
|
-
|
|
118
|
-
|
|
119
88
|
# ---------------------------------------------------------------------------
|
|
120
89
|
# Project Registry
|
|
121
90
|
# ---------------------------------------------------------------------------
|
|
@@ -540,7 +509,7 @@ def run_daemon() -> None:
|
|
|
540
509
|
except Exception:
|
|
541
510
|
pass
|
|
542
511
|
|
|
543
|
-
listener = Listener(sock_path, family=
|
|
512
|
+
listener = Listener(sock_path, family=connection_family())
|
|
544
513
|
logger.info("Listening on %s", sock_path)
|
|
545
514
|
|
|
546
515
|
loop = asyncio.new_event_loop()
|
|
@@ -224,5 +224,6 @@ async def indexer_main() -> None:
|
|
|
224
224
|
path_matcher=matcher,
|
|
225
225
|
)
|
|
226
226
|
|
|
227
|
-
|
|
228
|
-
|
|
227
|
+
await coco.mount_each(
|
|
228
|
+
coco.component_subpath(coco.Symbol("process_file")), process_file, files.items(), table
|
|
229
|
+
)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""LiteLLM embedder with optional request pacing and rate-limit retries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import cocoindex as coco
|
|
12
|
+
import numpy as np
|
|
13
|
+
from cocoindex.ops.litellm import LiteLLMEmbedder, litellm
|
|
14
|
+
from numpy.typing import NDArray
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
_RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE)
|
|
19
|
+
_MAX_RATE_LIMIT_RETRIES = 6
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_rate_limit_delay(exc: Exception, attempt: int) -> float | None:
|
|
23
|
+
message = str(exc)
|
|
24
|
+
if "rate limit" not in message.lower():
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
match = _RATE_LIMIT_DELAY_RE.search(message)
|
|
28
|
+
if match is not None:
|
|
29
|
+
value = float(match.group(1))
|
|
30
|
+
unit = match.group(2).lower()
|
|
31
|
+
delay = value / 1000.0 if unit == "ms" else value
|
|
32
|
+
else:
|
|
33
|
+
delay = min(0.5 * (2**attempt), 10.0)
|
|
34
|
+
|
|
35
|
+
return min(delay + 0.1, 10.0)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PacedLiteLLMEmbedder(LiteLLMEmbedder):
|
|
39
|
+
"""LiteLLM embedder that serializes requests and paces them when configured."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model: str, *, min_interval_ms: int | None = None, **kwargs: Any) -> None:
|
|
42
|
+
super().__init__(model, **kwargs)
|
|
43
|
+
self._min_request_interval_seconds = max(0.0, float(min_interval_ms or 0) / 1000.0)
|
|
44
|
+
self._request_lock: asyncio.Lock | None = None
|
|
45
|
+
self._next_request_at: float = 0.0
|
|
46
|
+
|
|
47
|
+
def _get_request_lock(self) -> asyncio.Lock:
|
|
48
|
+
if self._request_lock is None:
|
|
49
|
+
self._request_lock = asyncio.Lock()
|
|
50
|
+
return self._request_lock
|
|
51
|
+
|
|
52
|
+
async def _aembedding_with_rate_limit_retries(
|
|
53
|
+
self, *, model: str, input: list[str], **kwargs: Any
|
|
54
|
+
) -> Any:
|
|
55
|
+
last_exc: Exception | None = None
|
|
56
|
+
|
|
57
|
+
for attempt in range(_MAX_RATE_LIMIT_RETRIES):
|
|
58
|
+
try:
|
|
59
|
+
return await litellm.aembedding(model=model, input=input, **kwargs)
|
|
60
|
+
except Exception as exc: # noqa: BLE001
|
|
61
|
+
delay = _get_rate_limit_delay(exc, attempt)
|
|
62
|
+
last_exc = exc
|
|
63
|
+
if delay is None or attempt == _MAX_RATE_LIMIT_RETRIES - 1:
|
|
64
|
+
raise
|
|
65
|
+
|
|
66
|
+
logger.warning(
|
|
67
|
+
"Embedding rate limited for model %s, retrying in %.3fs (attempt %d/%d)",
|
|
68
|
+
model,
|
|
69
|
+
delay,
|
|
70
|
+
attempt + 1,
|
|
71
|
+
_MAX_RATE_LIMIT_RETRIES,
|
|
72
|
+
)
|
|
73
|
+
await asyncio.sleep(delay)
|
|
74
|
+
|
|
75
|
+
assert last_exc is not None
|
|
76
|
+
raise last_exc
|
|
77
|
+
|
|
78
|
+
async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any:
|
|
79
|
+
lock = self._get_request_lock()
|
|
80
|
+
async with lock:
|
|
81
|
+
now = time.monotonic()
|
|
82
|
+
if self._next_request_at > now:
|
|
83
|
+
await asyncio.sleep(self._next_request_at - now)
|
|
84
|
+
|
|
85
|
+
response = await self._aembedding_with_rate_limit_retries(
|
|
86
|
+
model=self._model,
|
|
87
|
+
input=input,
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
now = time.monotonic()
|
|
92
|
+
if self._min_request_interval_seconds > 0:
|
|
93
|
+
self._next_request_at = now + self._min_request_interval_seconds
|
|
94
|
+
else:
|
|
95
|
+
self._next_request_at = now
|
|
96
|
+
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
async def _get_dim(self) -> int:
|
|
100
|
+
if self._dim is not None:
|
|
101
|
+
return self._dim
|
|
102
|
+
async with self._get_lock():
|
|
103
|
+
if self._dim is not None:
|
|
104
|
+
return self._dim
|
|
105
|
+
response = await self.run_embedding_request(input=["hello"], **self._kwargs)
|
|
106
|
+
embedding = response.data[0]["embedding"]
|
|
107
|
+
self._dim = len(embedding)
|
|
108
|
+
return self._dim
|
|
109
|
+
|
|
110
|
+
@coco.fn.as_async(
|
|
111
|
+
batching=True,
|
|
112
|
+
max_batch_size=64,
|
|
113
|
+
memo=True,
|
|
114
|
+
version=1,
|
|
115
|
+
logic_tracking="self",
|
|
116
|
+
)
|
|
117
|
+
async def embed(
|
|
118
|
+
self,
|
|
119
|
+
texts: list[str],
|
|
120
|
+
input_type: str | None = None,
|
|
121
|
+
) -> list[NDArray[np.float32]]:
|
|
122
|
+
kwargs = dict(self._kwargs)
|
|
123
|
+
if input_type is not None:
|
|
124
|
+
kwargs["input_type"] = input_type
|
|
125
|
+
response = await self.run_embedding_request(input=texts, **kwargs)
|
|
126
|
+
return [np.array(item["embedding"], dtype=np.float32) for item in response.data]
|
|
@@ -92,6 +92,7 @@ class EmbeddingSettings:
|
|
|
92
92
|
model: str
|
|
93
93
|
provider: str = "litellm"
|
|
94
94
|
device: str | None = None
|
|
95
|
+
min_interval_ms: int | None = None
|
|
95
96
|
|
|
96
97
|
|
|
97
98
|
@dataclass
|
|
@@ -351,6 +352,8 @@ def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
|
|
|
351
352
|
}
|
|
352
353
|
if settings.embedding.device is not None:
|
|
353
354
|
emb["device"] = settings.embedding.device
|
|
355
|
+
if settings.embedding.min_interval_ms is not None:
|
|
356
|
+
emb["min_interval_ms"] = settings.embedding.min_interval_ms
|
|
354
357
|
d["embedding"] = emb
|
|
355
358
|
if settings.envs:
|
|
356
359
|
d["envs"] = dict(settings.envs)
|
|
@@ -367,6 +370,8 @@ def _user_settings_from_dict(d: dict[str, Any]) -> UserSettings:
|
|
|
367
370
|
emb_kwargs["provider"] = emb_dict["provider"]
|
|
368
371
|
if "device" in emb_dict:
|
|
369
372
|
emb_kwargs["device"] = emb_dict["device"]
|
|
373
|
+
if "min_interval_ms" in emb_dict:
|
|
374
|
+
emb_kwargs["min_interval_ms"] = emb_dict["min_interval_ms"]
|
|
370
375
|
embedding = EmbeddingSettings(**emb_kwargs)
|
|
371
376
|
envs = d.get("envs", {})
|
|
372
377
|
return UserSettings(embedding=embedding, envs=envs)
|
|
@@ -21,6 +21,7 @@ from .settings import EmbeddingSettings
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
SBERT_PREFIX = "sbert/"
|
|
24
|
+
DEFAULT_LITELLM_MIN_INTERVAL_MS = 5
|
|
24
25
|
|
|
25
26
|
# Models that define a "query" prompt for asymmetric retrieval.
|
|
26
27
|
_QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"}
|
|
@@ -63,11 +64,23 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
|
|
|
63
64
|
)
|
|
64
65
|
logger.info("Embedding model: %s | device: %s", settings.model, settings.device)
|
|
65
66
|
else:
|
|
66
|
-
from
|
|
67
|
+
from .litellm_embedder import PacedLiteLLMEmbedder
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
min_interval_ms = (
|
|
70
|
+
settings.min_interval_ms
|
|
71
|
+
if settings.min_interval_ms is not None
|
|
72
|
+
else DEFAULT_LITELLM_MIN_INTERVAL_MS
|
|
73
|
+
)
|
|
74
|
+
instance = PacedLiteLLMEmbedder(
|
|
75
|
+
settings.model,
|
|
76
|
+
min_interval_ms=min_interval_ms,
|
|
77
|
+
)
|
|
69
78
|
query_prompt_name = None
|
|
70
|
-
logger.info(
|
|
79
|
+
logger.info(
|
|
80
|
+
"Embedding model (LiteLLM): %s | min_interval_ms: %s",
|
|
81
|
+
settings.model,
|
|
82
|
+
min_interval_ms,
|
|
83
|
+
)
|
|
71
84
|
|
|
72
85
|
embedder = instance
|
|
73
86
|
return instance
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools-scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
__all__ = [
|
|
5
|
-
"__version__",
|
|
6
|
-
"__version_tuple__",
|
|
7
|
-
"version",
|
|
8
|
-
"version_tuple",
|
|
9
|
-
"__commit_id__",
|
|
10
|
-
"commit_id",
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
TYPE_CHECKING = False
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from typing import Tuple
|
|
16
|
-
from typing import Union
|
|
17
|
-
|
|
18
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
-
COMMIT_ID = Union[str, None]
|
|
20
|
-
else:
|
|
21
|
-
VERSION_TUPLE = object
|
|
22
|
-
COMMIT_ID = object
|
|
23
|
-
|
|
24
|
-
version: str
|
|
25
|
-
__version__: str
|
|
26
|
-
__version_tuple__: VERSION_TUPLE
|
|
27
|
-
version_tuple: VERSION_TUPLE
|
|
28
|
-
commit_id: COMMIT_ID
|
|
29
|
-
__commit_id__: COMMIT_ID
|
|
30
|
-
|
|
31
|
-
__version__ = version = '0.2.10'
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 2, 10)
|
|
33
|
-
|
|
34
|
-
__commit_id__ = commit_id = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|