remdb 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.0.dist-info/METADATA +1455 -0
- remdb-0.3.0.dist-info/RECORD +187 -0
- remdb-0.3.0.dist-info/WHEEL +4 -0
- remdb-0.3.0.dist-info/entry_points.txt +2 -0
rem/utils/embeddings.py
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embeddings utility for generating vector embeddings using various providers.
|
|
3
|
+
|
|
4
|
+
Uses requests library for HTTP calls (no provider SDKs required).
|
|
5
|
+
Supports batch processing to optimize API usage and respect rate limits.
|
|
6
|
+
Uses tenacity for automatic retry with exponential backoff.
|
|
7
|
+
|
|
8
|
+
Supported Providers:
|
|
9
|
+
- OpenAI: text-embedding-3-small, text-embedding-3-large, text-embedding-ada-002
|
|
10
|
+
- Anthropic: voyage-2 (via Voyage AI)
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
from rem.utils.embeddings import generate_embeddings
|
|
14
|
+
|
|
15
|
+
# Single text
|
|
16
|
+
embedding = generate_embeddings("openai:text-embedding-3-small", "Hello world")
|
|
17
|
+
|
|
18
|
+
# Batch processing
|
|
19
|
+
texts = ["Hello world", "How are you?", "Good morning"]
|
|
20
|
+
embeddings = generate_embeddings("openai:text-embedding-3-small", texts)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
from typing import Any, cast
|
|
25
|
+
|
|
26
|
+
import requests
|
|
27
|
+
from tenacity import (
|
|
28
|
+
retry,
|
|
29
|
+
retry_if_exception_type,
|
|
30
|
+
stop_after_attempt,
|
|
31
|
+
wait_exponential,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class EmbeddingError(Exception):
|
|
36
|
+
"""Base exception for embedding generation errors."""
|
|
37
|
+
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RateLimitError(EmbeddingError):
|
|
42
|
+
"""Raised when rate limit is exceeded."""
|
|
43
|
+
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def generate_embeddings(
|
|
48
|
+
embedding_provider: str,
|
|
49
|
+
texts: str | list[str],
|
|
50
|
+
api_key: str | None = None,
|
|
51
|
+
max_retries: int = 1,
|
|
52
|
+
) -> list[float] | list[list[float]]:
|
|
53
|
+
"""
|
|
54
|
+
Generate embeddings for text(s) using specified provider.
|
|
55
|
+
|
|
56
|
+
Uses tenacity for automatic retry with exponential backoff on rate limits.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
embedding_provider: Provider and model in format "provider:model_name"
|
|
60
|
+
(e.g., "openai:text-embedding-3-small")
|
|
61
|
+
texts: Single text string or list of texts to embed
|
|
62
|
+
api_key: API key for the provider. If None, reads from environment variables:
|
|
63
|
+
- OpenAI: OPENAI_API_KEY or LLM__OPENAI_API_KEY
|
|
64
|
+
- Anthropic: ANTHROPIC_API_KEY or LLM__ANTHROPIC_API_KEY
|
|
65
|
+
max_retries: Maximum number of retry attempts for rate limits (default: 1)
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
- If single text: list[float] (single embedding vector)
|
|
69
|
+
- If list of texts: list[list[float]] (list of embedding vectors)
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
EmbeddingError: If embedding generation fails
|
|
73
|
+
RateLimitError: If rate limit exceeded after retries
|
|
74
|
+
ValueError: If provider format is invalid
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
>>> embedding = generate_embeddings("openai:text-embedding-3-small", "Hello")
|
|
78
|
+
>>> len(embedding)
|
|
79
|
+
1536
|
|
80
|
+
|
|
81
|
+
>>> embeddings = generate_embeddings(
|
|
82
|
+
... "openai:text-embedding-3-small",
|
|
83
|
+
... ["Hello", "World"]
|
|
84
|
+
... )
|
|
85
|
+
>>> len(embeddings)
|
|
86
|
+
2
|
|
87
|
+
"""
|
|
88
|
+
# Parse provider format
|
|
89
|
+
if ":" not in embedding_provider:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Invalid embedding_provider format: {embedding_provider}. "
|
|
92
|
+
f"Expected format: 'provider:model_name'"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
provider, model_name = embedding_provider.split(":", 1)
|
|
96
|
+
provider = provider.lower()
|
|
97
|
+
|
|
98
|
+
# Normalize input to list
|
|
99
|
+
if isinstance(texts, str):
|
|
100
|
+
text_list: list[str] = [texts]
|
|
101
|
+
is_single = True
|
|
102
|
+
else:
|
|
103
|
+
text_list = texts
|
|
104
|
+
is_single = False
|
|
105
|
+
|
|
106
|
+
# Validate input
|
|
107
|
+
if not text_list:
|
|
108
|
+
raise ValueError("texts cannot be empty")
|
|
109
|
+
|
|
110
|
+
# Get API key from environment if not provided
|
|
111
|
+
if api_key is None:
|
|
112
|
+
api_key = _get_api_key(provider)
|
|
113
|
+
|
|
114
|
+
# Generate embeddings (tenacity handles retries)
|
|
115
|
+
if provider == "openai":
|
|
116
|
+
embeddings = _generate_openai_embeddings_with_retry(
|
|
117
|
+
model_name, text_list, api_key, max_retries
|
|
118
|
+
)
|
|
119
|
+
elif provider == "anthropic":
|
|
120
|
+
# Anthropic uses Voyage AI for embeddings
|
|
121
|
+
embeddings = _generate_voyage_embeddings_with_retry(
|
|
122
|
+
model_name, text_list, api_key, max_retries
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
raise ValueError(f"Unsupported embedding provider: {provider}")
|
|
126
|
+
|
|
127
|
+
# Return single embedding or list based on input
|
|
128
|
+
return embeddings[0] if is_single else embeddings
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _get_api_key(provider: str) -> str:
|
|
132
|
+
"""
|
|
133
|
+
Get API key from environment variables.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
provider: Provider name (openai, anthropic)
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
API key string
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
ValueError: If API key not found in environment
|
|
143
|
+
"""
|
|
144
|
+
from ..settings import settings
|
|
145
|
+
|
|
146
|
+
if provider == "openai":
|
|
147
|
+
api_key = settings.llm.openai_api_key
|
|
148
|
+
if not api_key:
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"OpenAI API key not found. Set LLM__OPENAI_API_KEY environment variable."
|
|
151
|
+
)
|
|
152
|
+
return api_key
|
|
153
|
+
elif provider == "anthropic":
|
|
154
|
+
api_key = settings.llm.anthropic_api_key
|
|
155
|
+
if not api_key:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
"Anthropic API key not found. Set LLM__ANTHROPIC_API_KEY environment variable."
|
|
158
|
+
)
|
|
159
|
+
return api_key
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _create_retry_decorator(max_retries: int):
|
|
165
|
+
"""Create a retry decorator with exponential backoff."""
|
|
166
|
+
return retry(
|
|
167
|
+
retry=retry_if_exception_type(RateLimitError),
|
|
168
|
+
stop=stop_after_attempt(max_retries),
|
|
169
|
+
wait=wait_exponential(multiplier=1, min=1, max=60),
|
|
170
|
+
reraise=True,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _generate_openai_embeddings_with_retry(
|
|
175
|
+
model: str, texts: list[str], api_key: str, max_retries: int
|
|
176
|
+
) -> list[list[float]]:
|
|
177
|
+
"""
|
|
178
|
+
Generate embeddings using OpenAI API with automatic retry.
|
|
179
|
+
|
|
180
|
+
Uses tenacity for exponential backoff on rate limits.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
model: OpenAI model name (e.g., "text-embedding-3-small")
|
|
184
|
+
texts: List of texts to embed
|
|
185
|
+
api_key: OpenAI API key
|
|
186
|
+
max_retries: Maximum number of retry attempts
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
List of embedding vectors
|
|
190
|
+
|
|
191
|
+
Raises:
|
|
192
|
+
EmbeddingError: If API request fails
|
|
193
|
+
RateLimitError: If rate limit exceeded after retries
|
|
194
|
+
"""
|
|
195
|
+
# Create retry decorator dynamically based on max_retries
|
|
196
|
+
retry_decorator = _create_retry_decorator(max_retries)
|
|
197
|
+
|
|
198
|
+
@retry_decorator
|
|
199
|
+
def _call_api():
|
|
200
|
+
return _generate_openai_embeddings(model, texts, api_key)
|
|
201
|
+
|
|
202
|
+
return cast(list[list[float]], _call_api())
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _generate_openai_embeddings(
|
|
206
|
+
model: str, texts: list[str], api_key: str
|
|
207
|
+
) -> list[list[float]]:
|
|
208
|
+
"""
|
|
209
|
+
Generate embeddings using OpenAI API (internal, no retry).
|
|
210
|
+
|
|
211
|
+
API Docs: https://platform.openai.com/docs/api-reference/embeddings
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
model: OpenAI model name (e.g., "text-embedding-3-small")
|
|
215
|
+
texts: List of texts to embed
|
|
216
|
+
api_key: OpenAI API key
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
List of embedding vectors
|
|
220
|
+
|
|
221
|
+
Raises:
|
|
222
|
+
EmbeddingError: If API request fails
|
|
223
|
+
RateLimitError: If rate limit exceeded
|
|
224
|
+
"""
|
|
225
|
+
url = "https://api.openai.com/v1/embeddings"
|
|
226
|
+
headers = {
|
|
227
|
+
"Authorization": f"Bearer {api_key}",
|
|
228
|
+
"Content-Type": "application/json",
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
payload = {
|
|
232
|
+
"model": model,
|
|
233
|
+
"input": texts,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
response = requests.post(url, json=payload, headers=headers, timeout=60)
|
|
238
|
+
|
|
239
|
+
# Handle rate limits
|
|
240
|
+
if response.status_code == 429:
|
|
241
|
+
raise RateLimitError(
|
|
242
|
+
f"OpenAI rate limit exceeded: {response.json().get('error', {}).get('message', 'Unknown error')}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Handle other errors
|
|
246
|
+
if response.status_code != 200:
|
|
247
|
+
error_msg = response.json().get("error", {}).get("message", "Unknown error")
|
|
248
|
+
raise EmbeddingError(
|
|
249
|
+
f"OpenAI API error (status {response.status_code}): {error_msg}"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Extract embeddings from response
|
|
253
|
+
data = response.json()
|
|
254
|
+
embeddings_data = data.get("data", [])
|
|
255
|
+
|
|
256
|
+
# Sort by index to maintain order (API may return out of order)
|
|
257
|
+
embeddings_data.sort(key=lambda x: x.get("index", 0))
|
|
258
|
+
|
|
259
|
+
embeddings = [item["embedding"] for item in embeddings_data]
|
|
260
|
+
|
|
261
|
+
if len(embeddings) != len(texts):
|
|
262
|
+
raise EmbeddingError(
|
|
263
|
+
f"Expected {len(texts)} embeddings, got {len(embeddings)}"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return embeddings
|
|
267
|
+
|
|
268
|
+
except requests.exceptions.Timeout:
|
|
269
|
+
raise EmbeddingError("OpenAI API request timed out")
|
|
270
|
+
except requests.exceptions.RequestException as e:
|
|
271
|
+
raise EmbeddingError(f"OpenAI API request failed: {str(e)}")
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _generate_voyage_embeddings_with_retry(
|
|
275
|
+
model: str, texts: list[str], api_key: str, max_retries: int
|
|
276
|
+
) -> list[list[float]]:
|
|
277
|
+
"""
|
|
278
|
+
Generate embeddings using Voyage AI API with automatic retry.
|
|
279
|
+
|
|
280
|
+
Uses tenacity for exponential backoff on rate limits.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
model: Voyage model name (e.g., "voyage-2")
|
|
284
|
+
texts: List of texts to embed
|
|
285
|
+
api_key: Voyage AI API key
|
|
286
|
+
max_retries: Maximum number of retry attempts
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
List of embedding vectors
|
|
290
|
+
|
|
291
|
+
Raises:
|
|
292
|
+
EmbeddingError: If API request fails
|
|
293
|
+
RateLimitError: If rate limit exceeded after retries
|
|
294
|
+
"""
|
|
295
|
+
# Create retry decorator dynamically based on max_retries
|
|
296
|
+
retry_decorator = _create_retry_decorator(max_retries)
|
|
297
|
+
|
|
298
|
+
@retry_decorator
|
|
299
|
+
def _call_api():
|
|
300
|
+
return _generate_voyage_embeddings(model, texts, api_key)
|
|
301
|
+
|
|
302
|
+
return cast(list[list[float]], _call_api())
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _generate_voyage_embeddings(
|
|
306
|
+
model: str, texts: list[str], api_key: str
|
|
307
|
+
) -> list[list[float]]:
|
|
308
|
+
"""
|
|
309
|
+
Generate embeddings using Voyage AI API (internal, no retry).
|
|
310
|
+
|
|
311
|
+
API Docs: https://docs.voyageai.com/docs/embeddings
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
model: Voyage model name (e.g., "voyage-2")
|
|
315
|
+
texts: List of texts to embed
|
|
316
|
+
api_key: Voyage AI API key
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
List of embedding vectors
|
|
320
|
+
|
|
321
|
+
Raises:
|
|
322
|
+
EmbeddingError: If API request fails
|
|
323
|
+
RateLimitError: If rate limit exceeded
|
|
324
|
+
"""
|
|
325
|
+
url = "https://api.voyageai.com/v1/embeddings"
|
|
326
|
+
headers = {
|
|
327
|
+
"Authorization": f"Bearer {api_key}",
|
|
328
|
+
"Content-Type": "application/json",
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
payload = {
|
|
332
|
+
"model": model,
|
|
333
|
+
"input": texts,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
response = requests.post(url, json=payload, headers=headers, timeout=60)
|
|
338
|
+
|
|
339
|
+
# Handle rate limits
|
|
340
|
+
if response.status_code == 429:
|
|
341
|
+
raise RateLimitError(
|
|
342
|
+
f"Voyage AI rate limit exceeded: {response.json().get('error', {}).get('message', 'Unknown error')}"
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
# Handle other errors
|
|
346
|
+
if response.status_code != 200:
|
|
347
|
+
error_msg = response.json().get("error", {}).get("message", "Unknown error")
|
|
348
|
+
raise EmbeddingError(
|
|
349
|
+
f"Voyage AI API error (status {response.status_code}): {error_msg}"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Extract embeddings from response
|
|
353
|
+
data = response.json()
|
|
354
|
+
embeddings_data = data.get("data", [])
|
|
355
|
+
|
|
356
|
+
embeddings = [item["embedding"] for item in embeddings_data]
|
|
357
|
+
|
|
358
|
+
if len(embeddings) != len(texts):
|
|
359
|
+
raise EmbeddingError(
|
|
360
|
+
f"Expected {len(texts)} embeddings, got {len(embeddings)}"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return embeddings
|
|
364
|
+
|
|
365
|
+
except requests.exceptions.Timeout:
|
|
366
|
+
raise EmbeddingError("Voyage AI API request timed out")
|
|
367
|
+
except requests.exceptions.RequestException as e:
|
|
368
|
+
raise EmbeddingError(f"Voyage AI API request failed: {str(e)}")
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def get_embedding_dimension(embedding_provider: str) -> int:
|
|
372
|
+
"""
|
|
373
|
+
Get embedding dimension for a given provider and model.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
embedding_provider: Provider and model in format "provider:model_name"
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Embedding dimension (vector length)
|
|
380
|
+
|
|
381
|
+
Raises:
|
|
382
|
+
ValueError: If provider/model is unknown
|
|
383
|
+
|
|
384
|
+
Examples:
|
|
385
|
+
>>> get_embedding_dimension("openai:text-embedding-3-small")
|
|
386
|
+
1536
|
|
387
|
+
>>> get_embedding_dimension("openai:text-embedding-3-large")
|
|
388
|
+
3072
|
|
389
|
+
"""
|
|
390
|
+
if ":" not in embedding_provider:
|
|
391
|
+
raise ValueError(
|
|
392
|
+
f"Invalid embedding_provider format: {embedding_provider}. "
|
|
393
|
+
f"Expected format: 'provider:model_name'"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
provider, model_name = embedding_provider.split(":", 1)
|
|
397
|
+
provider = provider.lower()
|
|
398
|
+
|
|
399
|
+
# OpenAI dimensions
|
|
400
|
+
openai_dimensions = {
|
|
401
|
+
"text-embedding-3-small": 1536,
|
|
402
|
+
"text-embedding-3-large": 3072,
|
|
403
|
+
"text-embedding-ada-002": 1536,
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
# Voyage AI dimensions
|
|
407
|
+
voyage_dimensions = {
|
|
408
|
+
"voyage-2": 1024,
|
|
409
|
+
"voyage-large-2": 1536,
|
|
410
|
+
"voyage-code-2": 1536,
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if provider == "openai":
|
|
414
|
+
if model_name in openai_dimensions:
|
|
415
|
+
return openai_dimensions[model_name]
|
|
416
|
+
raise ValueError(f"Unknown OpenAI model: {model_name}")
|
|
417
|
+
elif provider == "anthropic":
|
|
418
|
+
# Anthropic uses Voyage AI
|
|
419
|
+
if model_name in voyage_dimensions:
|
|
420
|
+
return voyage_dimensions[model_name]
|
|
421
|
+
raise ValueError(f"Unknown Voyage AI model: {model_name}")
|
|
422
|
+
else:
|
|
423
|
+
raise ValueError(f"Unknown provider: {provider}")
|