hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +311 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1406 -118
- hindsight_api/api/mcp.py +11 -196
- hindsight_api/config.py +359 -27
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +706 -88
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +553 -29
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +40 -17
- hindsight_api/engine/llm_wrapper.py +744 -68
- hindsight_api/engine/memory_engine.py +2505 -1017
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +168 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +424 -195
- hindsight_api/engine/retain/fact_storage.py +35 -12
- hindsight_api/engine/retain/link_utils.py +29 -24
- hindsight_api/engine/retain/orchestrator.py +24 -43
- hindsight_api/engine/retain/types.py +11 -2
- hindsight_api/engine/search/graph_retrieval.py +43 -14
- hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +848 -201
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +42 -141
- hindsight_api/engine/search/trace.py +12 -1
- hindsight_api/engine/search/tracer.py +26 -6
- hindsight_api/engine/search/types.py +21 -3
- hindsight_api/engine/task_backend.py +113 -106
- hindsight_api/engine/utils.py +1 -152
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +69 -6
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -3
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -3,8 +3,8 @@ Embeddings abstraction for the memory system.
|
|
|
3
3
|
|
|
4
4
|
Provides an interface for generating embeddings with different backends.
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
The embedding dimension is auto-detected from the model at initialization.
|
|
7
|
+
The database schema is automatically adjusted to match the model's dimension.
|
|
8
8
|
|
|
9
9
|
Configuration via environment variables - see hindsight_api.config for all env var names.
|
|
10
10
|
"""
|
|
@@ -16,12 +16,25 @@ from abc import ABC, abstractmethod
|
|
|
16
16
|
import httpx
|
|
17
17
|
|
|
18
18
|
from ..config import (
|
|
19
|
+
DEFAULT_EMBEDDINGS_COHERE_MODEL,
|
|
20
|
+
DEFAULT_EMBEDDINGS_LITELLM_MODEL,
|
|
19
21
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL,
|
|
22
|
+
DEFAULT_EMBEDDINGS_OPENAI_MODEL,
|
|
20
23
|
DEFAULT_EMBEDDINGS_PROVIDER,
|
|
21
|
-
|
|
24
|
+
DEFAULT_LITELLM_API_BASE,
|
|
25
|
+
ENV_COHERE_API_KEY,
|
|
26
|
+
ENV_EMBEDDINGS_COHERE_BASE_URL,
|
|
27
|
+
ENV_EMBEDDINGS_COHERE_MODEL,
|
|
28
|
+
ENV_EMBEDDINGS_LITELLM_MODEL,
|
|
22
29
|
ENV_EMBEDDINGS_LOCAL_MODEL,
|
|
30
|
+
ENV_EMBEDDINGS_OPENAI_API_KEY,
|
|
31
|
+
ENV_EMBEDDINGS_OPENAI_BASE_URL,
|
|
32
|
+
ENV_EMBEDDINGS_OPENAI_MODEL,
|
|
23
33
|
ENV_EMBEDDINGS_PROVIDER,
|
|
24
34
|
ENV_EMBEDDINGS_TEI_URL,
|
|
35
|
+
ENV_LITELLM_API_BASE,
|
|
36
|
+
ENV_LITELLM_API_KEY,
|
|
37
|
+
ENV_LLM_API_KEY,
|
|
25
38
|
)
|
|
26
39
|
|
|
27
40
|
logger = logging.getLogger(__name__)
|
|
@@ -31,8 +44,8 @@ class Embeddings(ABC):
|
|
|
31
44
|
"""
|
|
32
45
|
Abstract base class for embedding generation.
|
|
33
46
|
|
|
34
|
-
|
|
35
|
-
|
|
47
|
+
The embedding dimension is determined by the model and detected at initialization.
|
|
48
|
+
The database schema is automatically adjusted to match the model's dimension.
|
|
36
49
|
"""
|
|
37
50
|
|
|
38
51
|
@property
|
|
@@ -41,6 +54,12 @@ class Embeddings(ABC):
|
|
|
41
54
|
"""Return a human-readable name for this provider (e.g., 'local', 'tei')."""
|
|
42
55
|
pass
|
|
43
56
|
|
|
57
|
+
@property
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def dimension(self) -> int:
|
|
60
|
+
"""Return the embedding dimension produced by this model."""
|
|
61
|
+
pass
|
|
62
|
+
|
|
44
63
|
@abstractmethod
|
|
45
64
|
async def initialize(self) -> None:
|
|
46
65
|
"""
|
|
@@ -54,13 +73,13 @@ class Embeddings(ABC):
|
|
|
54
73
|
@abstractmethod
|
|
55
74
|
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
56
75
|
"""
|
|
57
|
-
Generate
|
|
76
|
+
Generate embeddings for a list of texts.
|
|
58
77
|
|
|
59
78
|
Args:
|
|
60
79
|
texts: List of text strings to encode
|
|
61
80
|
|
|
62
81
|
Returns:
|
|
63
|
-
List of
|
|
82
|
+
List of embedding vectors (each is a list of floats)
|
|
64
83
|
"""
|
|
65
84
|
pass
|
|
66
85
|
|
|
@@ -70,9 +89,7 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
70
89
|
Local embeddings implementation using SentenceTransformers.
|
|
71
90
|
|
|
72
91
|
Call initialize() during startup to load the model and avoid cold starts.
|
|
73
|
-
|
|
74
|
-
Default model is BAAI/bge-small-en-v1.5 which produces 384-dimensional
|
|
75
|
-
embeddings matching the database schema.
|
|
92
|
+
The embedding dimension is auto-detected from the model.
|
|
76
93
|
"""
|
|
77
94
|
|
|
78
95
|
def __init__(self, model_name: str | None = None):
|
|
@@ -81,16 +98,22 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
81
98
|
|
|
82
99
|
Args:
|
|
83
100
|
model_name: Name of the SentenceTransformer model to use.
|
|
84
|
-
Must produce 384-dimensional embeddings.
|
|
85
101
|
Default: BAAI/bge-small-en-v1.5
|
|
86
102
|
"""
|
|
87
103
|
self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
|
|
88
104
|
self._model = None
|
|
105
|
+
self._dimension: int | None = None
|
|
89
106
|
|
|
90
107
|
@property
|
|
91
108
|
def provider_name(self) -> str:
|
|
92
109
|
return "local"
|
|
93
110
|
|
|
111
|
+
@property
|
|
112
|
+
def dimension(self) -> int:
|
|
113
|
+
if self._dimension is None:
|
|
114
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
115
|
+
return self._dimension
|
|
116
|
+
|
|
94
117
|
async def initialize(self) -> None:
|
|
95
118
|
"""Load the embedding model."""
|
|
96
119
|
if self._model is not None:
|
|
@@ -105,38 +128,127 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
105
128
|
)
|
|
106
129
|
|
|
107
130
|
logger.info(f"Embeddings: initializing local provider with model {self.model_name}")
|
|
108
|
-
|
|
109
|
-
#
|
|
131
|
+
|
|
132
|
+
# Determine device based on hardware availability.
|
|
133
|
+
# We always set low_cpu_mem_usage=False to prevent lazy loading (meta tensors)
|
|
134
|
+
# which can cause issues when accelerate is installed but no GPU is available.
|
|
135
|
+
import torch
|
|
136
|
+
|
|
137
|
+
# Check for GPU (CUDA) or Apple Silicon (MPS)
|
|
138
|
+
has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
|
|
139
|
+
|
|
140
|
+
if has_gpu:
|
|
141
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
142
|
+
else:
|
|
143
|
+
device = "cpu"
|
|
144
|
+
|
|
110
145
|
self._model = SentenceTransformer(
|
|
111
146
|
self.model_name,
|
|
112
|
-
|
|
147
|
+
device=device,
|
|
148
|
+
model_kwargs={"low_cpu_mem_usage": False},
|
|
113
149
|
)
|
|
114
150
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
151
|
+
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
152
|
+
logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
|
|
153
|
+
|
|
154
|
+
def _is_xpc_error(self, error: Exception) -> bool:
|
|
155
|
+
"""
|
|
156
|
+
Check if an error is an XPC connection error (macOS daemon issue).
|
|
157
|
+
|
|
158
|
+
On macOS, long-running daemons can lose XPC connections to system services
|
|
159
|
+
when the process is idle for extended periods.
|
|
160
|
+
"""
|
|
161
|
+
error_str = str(error).lower()
|
|
162
|
+
return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
|
|
163
|
+
|
|
164
|
+
def _reinitialize_model_sync(self) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Clear and reinitialize the embedding model synchronously.
|
|
167
|
+
|
|
168
|
+
This is used to recover from XPC errors on macOS where the
|
|
169
|
+
PyTorch/MPS backend loses its connection to system services.
|
|
170
|
+
"""
|
|
171
|
+
logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
|
|
172
|
+
|
|
173
|
+
# Clear existing model
|
|
174
|
+
self._model = None
|
|
175
|
+
|
|
176
|
+
# Force garbage collection to free resources
|
|
177
|
+
import gc
|
|
178
|
+
|
|
179
|
+
import torch
|
|
180
|
+
|
|
181
|
+
gc.collect()
|
|
182
|
+
|
|
183
|
+
# If using CUDA/MPS, clear the cache
|
|
184
|
+
if torch.cuda.is_available():
|
|
185
|
+
torch.cuda.empty_cache()
|
|
186
|
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
187
|
+
try:
|
|
188
|
+
torch.mps.empty_cache()
|
|
189
|
+
except AttributeError:
|
|
190
|
+
pass # Method might not exist in all PyTorch versions
|
|
191
|
+
|
|
192
|
+
# Reinitialize the model (inline version of initialize() but synchronous)
|
|
193
|
+
try:
|
|
194
|
+
from sentence_transformers import SentenceTransformer
|
|
195
|
+
except ImportError:
|
|
196
|
+
raise ImportError(
|
|
197
|
+
"sentence-transformers is required for LocalSTEmbeddings. "
|
|
198
|
+
"Install it with: pip install sentence-transformers"
|
|
122
199
|
)
|
|
123
200
|
|
|
124
|
-
|
|
201
|
+
# Determine device based on hardware availability
|
|
202
|
+
has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
|
|
203
|
+
|
|
204
|
+
if has_gpu:
|
|
205
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
206
|
+
else:
|
|
207
|
+
device = "cpu"
|
|
208
|
+
|
|
209
|
+
self._model = SentenceTransformer(
|
|
210
|
+
self.model_name,
|
|
211
|
+
device=device,
|
|
212
|
+
model_kwargs={"low_cpu_mem_usage": False},
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
logger.info("Embeddings: local provider reinitialized successfully")
|
|
125
216
|
|
|
126
217
|
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
127
218
|
"""
|
|
128
|
-
Generate
|
|
219
|
+
Generate embeddings for a list of texts.
|
|
220
|
+
|
|
221
|
+
Automatically recovers from XPC errors on macOS by reinitializing the model.
|
|
129
222
|
|
|
130
223
|
Args:
|
|
131
224
|
texts: List of text strings to encode
|
|
132
225
|
|
|
133
226
|
Returns:
|
|
134
|
-
List of
|
|
227
|
+
List of embedding vectors
|
|
135
228
|
"""
|
|
136
229
|
if self._model is None:
|
|
137
230
|
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
138
|
-
|
|
139
|
-
|
|
231
|
+
|
|
232
|
+
# Try encoding with automatic recovery from XPC errors
|
|
233
|
+
max_retries = 1
|
|
234
|
+
for attempt in range(max_retries + 1):
|
|
235
|
+
try:
|
|
236
|
+
embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
|
237
|
+
return [emb.tolist() for emb in embeddings]
|
|
238
|
+
except Exception as e:
|
|
239
|
+
# Check if this is an XPC error (macOS daemon issue)
|
|
240
|
+
if self._is_xpc_error(e) and attempt < max_retries:
|
|
241
|
+
logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
|
|
242
|
+
try:
|
|
243
|
+
self._reinitialize_model_sync()
|
|
244
|
+
logger.info("Model reinitialized successfully, retrying embedding generation")
|
|
245
|
+
continue
|
|
246
|
+
except Exception as reinit_error:
|
|
247
|
+
logger.error(f"Failed to reinitialize model: {reinit_error}")
|
|
248
|
+
raise Exception(f"Failed to recover from XPC error: {str(e)}")
|
|
249
|
+
else:
|
|
250
|
+
# Not an XPC error or out of retries
|
|
251
|
+
raise
|
|
140
252
|
|
|
141
253
|
|
|
142
254
|
class RemoteTEIEmbeddings(Embeddings):
|
|
@@ -146,7 +258,7 @@ class RemoteTEIEmbeddings(Embeddings):
|
|
|
146
258
|
TEI provides a high-performance inference server for embedding models.
|
|
147
259
|
See: https://github.com/huggingface/text-embeddings-inference
|
|
148
260
|
|
|
149
|
-
The
|
|
261
|
+
The embedding dimension is auto-detected from the server at initialization.
|
|
150
262
|
"""
|
|
151
263
|
|
|
152
264
|
def __init__(
|
|
@@ -174,11 +286,18 @@ class RemoteTEIEmbeddings(Embeddings):
|
|
|
174
286
|
self.retry_delay = retry_delay
|
|
175
287
|
self._client: httpx.Client | None = None
|
|
176
288
|
self._model_id: str | None = None
|
|
289
|
+
self._dimension: int | None = None
|
|
177
290
|
|
|
178
291
|
@property
|
|
179
292
|
def provider_name(self) -> str:
|
|
180
293
|
return "tei"
|
|
181
294
|
|
|
295
|
+
@property
|
|
296
|
+
def dimension(self) -> int:
|
|
297
|
+
if self._dimension is None:
|
|
298
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
299
|
+
return self._dimension
|
|
300
|
+
|
|
182
301
|
def _request_with_retry(self, method: str, url: str, **kwargs) -> httpx.Response:
|
|
183
302
|
"""Make an HTTP request with automatic retries on transient errors."""
|
|
184
303
|
import time
|
|
@@ -229,7 +348,24 @@ class RemoteTEIEmbeddings(Embeddings):
|
|
|
229
348
|
response = self._request_with_retry("GET", f"{self.base_url}/info")
|
|
230
349
|
info = response.json()
|
|
231
350
|
self._model_id = info.get("model_id", "unknown")
|
|
232
|
-
|
|
351
|
+
|
|
352
|
+
# Get dimension from server info or by doing a test embedding
|
|
353
|
+
if "max_input_length" in info and "model_dtype" in info:
|
|
354
|
+
# Try to get dimension from info endpoint (some TEI versions expose it)
|
|
355
|
+
# If not available, do a test embedding
|
|
356
|
+
pass
|
|
357
|
+
|
|
358
|
+
# Do a test embedding to detect dimension
|
|
359
|
+
test_response = self._request_with_retry(
|
|
360
|
+
"POST",
|
|
361
|
+
f"{self.base_url}/embed",
|
|
362
|
+
json={"inputs": ["test"]},
|
|
363
|
+
)
|
|
364
|
+
test_embeddings = test_response.json()
|
|
365
|
+
if test_embeddings and len(test_embeddings) > 0:
|
|
366
|
+
self._dimension = len(test_embeddings[0])
|
|
367
|
+
|
|
368
|
+
logger.info(f"Embeddings: TEI provider initialized (model: {self._model_id}, dim: {self._dimension})")
|
|
233
369
|
except httpx.HTTPError as e:
|
|
234
370
|
raise RuntimeError(f"Failed to connect to TEI server at {self.base_url}: {e}")
|
|
235
371
|
|
|
@@ -269,6 +405,369 @@ class RemoteTEIEmbeddings(Embeddings):
|
|
|
269
405
|
return all_embeddings
|
|
270
406
|
|
|
271
407
|
|
|
408
|
+
class OpenAIEmbeddings(Embeddings):
|
|
409
|
+
"""
|
|
410
|
+
OpenAI embeddings implementation using the OpenAI API.
|
|
411
|
+
|
|
412
|
+
Supports text-embedding-3-small (1536 dims), text-embedding-3-large (3072 dims),
|
|
413
|
+
and text-embedding-ada-002 (1536 dims, legacy).
|
|
414
|
+
|
|
415
|
+
The embedding dimension is auto-detected from the model at initialization.
|
|
416
|
+
"""
|
|
417
|
+
|
|
418
|
+
# Known dimensions for OpenAI embedding models
|
|
419
|
+
MODEL_DIMENSIONS = {
|
|
420
|
+
"text-embedding-3-small": 1536,
|
|
421
|
+
"text-embedding-3-large": 3072,
|
|
422
|
+
"text-embedding-ada-002": 1536,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
def __init__(
|
|
426
|
+
self,
|
|
427
|
+
api_key: str,
|
|
428
|
+
model: str = DEFAULT_EMBEDDINGS_OPENAI_MODEL,
|
|
429
|
+
base_url: str | None = None,
|
|
430
|
+
batch_size: int = 100,
|
|
431
|
+
max_retries: int = 3,
|
|
432
|
+
):
|
|
433
|
+
"""
|
|
434
|
+
Initialize OpenAI embeddings client.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
api_key: OpenAI API key
|
|
438
|
+
model: OpenAI embedding model name (default: text-embedding-3-small)
|
|
439
|
+
base_url: Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI endpoint)
|
|
440
|
+
batch_size: Maximum batch size for embedding requests (default: 100)
|
|
441
|
+
max_retries: Maximum number of retries for failed requests (default: 3)
|
|
442
|
+
"""
|
|
443
|
+
self.api_key = api_key
|
|
444
|
+
self.model = model
|
|
445
|
+
self.base_url = base_url
|
|
446
|
+
self.batch_size = batch_size
|
|
447
|
+
self.max_retries = max_retries
|
|
448
|
+
self._client = None
|
|
449
|
+
self._dimension: int | None = None
|
|
450
|
+
|
|
451
|
+
@property
|
|
452
|
+
def provider_name(self) -> str:
|
|
453
|
+
return "openai"
|
|
454
|
+
|
|
455
|
+
@property
|
|
456
|
+
def dimension(self) -> int:
|
|
457
|
+
if self._dimension is None:
|
|
458
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
459
|
+
return self._dimension
|
|
460
|
+
|
|
461
|
+
async def initialize(self) -> None:
|
|
462
|
+
"""Initialize the OpenAI client and detect dimension."""
|
|
463
|
+
if self._client is not None:
|
|
464
|
+
return
|
|
465
|
+
|
|
466
|
+
try:
|
|
467
|
+
from openai import OpenAI
|
|
468
|
+
except ImportError:
|
|
469
|
+
raise ImportError("openai is required for OpenAIEmbeddings. Install it with: pip install openai")
|
|
470
|
+
|
|
471
|
+
base_url_msg = f" at {self.base_url}" if self.base_url else ""
|
|
472
|
+
logger.info(f"Embeddings: initializing OpenAI provider with model {self.model}{base_url_msg}")
|
|
473
|
+
|
|
474
|
+
# Build client kwargs, only including base_url if set (for Azure or custom endpoints)
|
|
475
|
+
client_kwargs = {"api_key": self.api_key, "max_retries": self.max_retries}
|
|
476
|
+
if self.base_url:
|
|
477
|
+
client_kwargs["base_url"] = self.base_url
|
|
478
|
+
self._client = OpenAI(**client_kwargs)
|
|
479
|
+
|
|
480
|
+
# Try to get dimension from known models, otherwise do a test embedding
|
|
481
|
+
if self.model in self.MODEL_DIMENSIONS:
|
|
482
|
+
self._dimension = self.MODEL_DIMENSIONS[self.model]
|
|
483
|
+
else:
|
|
484
|
+
# Do a test embedding to detect dimension
|
|
485
|
+
response = self._client.embeddings.create(
|
|
486
|
+
model=self.model,
|
|
487
|
+
input=["test"],
|
|
488
|
+
)
|
|
489
|
+
if response.data:
|
|
490
|
+
self._dimension = len(response.data[0].embedding)
|
|
491
|
+
|
|
492
|
+
logger.info(f"Embeddings: OpenAI provider initialized (model: {self.model}, dim: {self._dimension})")
|
|
493
|
+
|
|
494
|
+
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
495
|
+
"""
|
|
496
|
+
Generate embeddings using the OpenAI API.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
texts: List of text strings to encode
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
List of embedding vectors
|
|
503
|
+
"""
|
|
504
|
+
if self._client is None:
|
|
505
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
506
|
+
|
|
507
|
+
if not texts:
|
|
508
|
+
return []
|
|
509
|
+
|
|
510
|
+
all_embeddings = []
|
|
511
|
+
|
|
512
|
+
# Process in batches
|
|
513
|
+
for i in range(0, len(texts), self.batch_size):
|
|
514
|
+
batch = texts[i : i + self.batch_size]
|
|
515
|
+
|
|
516
|
+
response = self._client.embeddings.create(
|
|
517
|
+
model=self.model,
|
|
518
|
+
input=batch,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
# Sort by index to ensure correct order
|
|
522
|
+
batch_embeddings = sorted(response.data, key=lambda x: x.index)
|
|
523
|
+
all_embeddings.extend([e.embedding for e in batch_embeddings])
|
|
524
|
+
|
|
525
|
+
return all_embeddings
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
class CohereEmbeddings(Embeddings):
|
|
529
|
+
"""
|
|
530
|
+
Cohere embeddings implementation using the Cohere API.
|
|
531
|
+
|
|
532
|
+
Supports embed-english-v3.0 (1024 dims) and embed-multilingual-v3.0 (1024 dims).
|
|
533
|
+
|
|
534
|
+
The embedding dimension is auto-detected from the model at initialization.
|
|
535
|
+
"""
|
|
536
|
+
|
|
537
|
+
# Known dimensions for Cohere embedding models
|
|
538
|
+
MODEL_DIMENSIONS = {
|
|
539
|
+
"embed-english-v3.0": 1024,
|
|
540
|
+
"embed-multilingual-v3.0": 1024,
|
|
541
|
+
"embed-english-light-v3.0": 384,
|
|
542
|
+
"embed-multilingual-light-v3.0": 384,
|
|
543
|
+
"embed-english-v2.0": 4096,
|
|
544
|
+
"embed-multilingual-v2.0": 768,
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
def __init__(
|
|
548
|
+
self,
|
|
549
|
+
api_key: str,
|
|
550
|
+
model: str = DEFAULT_EMBEDDINGS_COHERE_MODEL,
|
|
551
|
+
base_url: str | None = None,
|
|
552
|
+
batch_size: int = 96,
|
|
553
|
+
timeout: float = 60.0,
|
|
554
|
+
input_type: str = "search_document",
|
|
555
|
+
):
|
|
556
|
+
"""
|
|
557
|
+
Initialize Cohere embeddings client.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
api_key: Cohere API key
|
|
561
|
+
model: Cohere embedding model name (default: embed-english-v3.0)
|
|
562
|
+
base_url: Custom base URL for Cohere-compatible API (e.g., Azure-hosted endpoint)
|
|
563
|
+
batch_size: Maximum batch size for embedding requests (default: 96, Cohere's limit)
|
|
564
|
+
timeout: Request timeout in seconds (default: 60.0)
|
|
565
|
+
input_type: Input type for embeddings (default: search_document).
|
|
566
|
+
Options: search_document, search_query, classification, clustering
|
|
567
|
+
"""
|
|
568
|
+
self.api_key = api_key
|
|
569
|
+
self.model = model
|
|
570
|
+
self.base_url = base_url
|
|
571
|
+
self.batch_size = batch_size
|
|
572
|
+
self.timeout = timeout
|
|
573
|
+
self.input_type = input_type
|
|
574
|
+
self._client = None
|
|
575
|
+
self._dimension: int | None = None
|
|
576
|
+
|
|
577
|
+
@property
|
|
578
|
+
def provider_name(self) -> str:
|
|
579
|
+
return "cohere"
|
|
580
|
+
|
|
581
|
+
@property
|
|
582
|
+
def dimension(self) -> int:
|
|
583
|
+
if self._dimension is None:
|
|
584
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
585
|
+
return self._dimension
|
|
586
|
+
|
|
587
|
+
async def initialize(self) -> None:
|
|
588
|
+
"""Initialize the Cohere client and detect dimension."""
|
|
589
|
+
if self._client is not None:
|
|
590
|
+
return
|
|
591
|
+
|
|
592
|
+
try:
|
|
593
|
+
import cohere
|
|
594
|
+
except ImportError:
|
|
595
|
+
raise ImportError("cohere is required for CohereEmbeddings. Install it with: pip install cohere")
|
|
596
|
+
|
|
597
|
+
base_url_msg = f" at {self.base_url}" if self.base_url else ""
|
|
598
|
+
logger.info(f"Embeddings: initializing Cohere provider with model {self.model}{base_url_msg}")
|
|
599
|
+
|
|
600
|
+
# Build client kwargs, only including base_url if set (for Azure or custom endpoints)
|
|
601
|
+
client_kwargs = {"api_key": self.api_key, "timeout": self.timeout}
|
|
602
|
+
if self.base_url:
|
|
603
|
+
client_kwargs["base_url"] = self.base_url
|
|
604
|
+
self._client = cohere.Client(**client_kwargs)
|
|
605
|
+
|
|
606
|
+
# Try to get dimension from known models, otherwise do a test embedding
|
|
607
|
+
if self.model in self.MODEL_DIMENSIONS:
|
|
608
|
+
self._dimension = self.MODEL_DIMENSIONS[self.model]
|
|
609
|
+
else:
|
|
610
|
+
# Do a test embedding to detect dimension
|
|
611
|
+
response = self._client.embed(
|
|
612
|
+
texts=["test"],
|
|
613
|
+
model=self.model,
|
|
614
|
+
input_type=self.input_type,
|
|
615
|
+
)
|
|
616
|
+
if response.embeddings:
|
|
617
|
+
self._dimension = len(response.embeddings[0])
|
|
618
|
+
|
|
619
|
+
logger.info(f"Embeddings: Cohere provider initialized (model: {self.model}, dim: {self._dimension})")
|
|
620
|
+
|
|
621
|
+
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
622
|
+
"""
|
|
623
|
+
Generate embeddings using the Cohere API.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
texts: List of text strings to encode
|
|
627
|
+
|
|
628
|
+
Returns:
|
|
629
|
+
List of embedding vectors
|
|
630
|
+
"""
|
|
631
|
+
if self._client is None:
|
|
632
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
633
|
+
|
|
634
|
+
if not texts:
|
|
635
|
+
return []
|
|
636
|
+
|
|
637
|
+
all_embeddings = []
|
|
638
|
+
|
|
639
|
+
# Process in batches
|
|
640
|
+
for i in range(0, len(texts), self.batch_size):
|
|
641
|
+
batch = texts[i : i + self.batch_size]
|
|
642
|
+
|
|
643
|
+
response = self._client.embed(
|
|
644
|
+
texts=batch,
|
|
645
|
+
model=self.model,
|
|
646
|
+
input_type=self.input_type,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
all_embeddings.extend(response.embeddings)
|
|
650
|
+
|
|
651
|
+
return all_embeddings
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
class LiteLLMEmbeddings(Embeddings):
|
|
655
|
+
"""
|
|
656
|
+
LiteLLM embeddings implementation using LiteLLM proxy's /embeddings endpoint.
|
|
657
|
+
|
|
658
|
+
LiteLLM provides a unified interface for multiple embedding providers.
|
|
659
|
+
The proxy exposes an OpenAI-compatible /embeddings endpoint.
|
|
660
|
+
See: https://docs.litellm.ai/docs/embedding/supported_embedding
|
|
661
|
+
|
|
662
|
+
Supported providers via LiteLLM:
|
|
663
|
+
- OpenAI (text-embedding-3-small, text-embedding-ada-002, etc.)
|
|
664
|
+
- Cohere (embed-english-v3.0, etc.) - prefix with cohere/
|
|
665
|
+
- Vertex AI (textembedding-gecko, etc.) - prefix with vertex_ai/
|
|
666
|
+
- HuggingFace, Mistral, Voyage AI, etc.
|
|
667
|
+
|
|
668
|
+
The embedding dimension is auto-detected from the model at initialization.
|
|
669
|
+
"""
|
|
670
|
+
|
|
671
|
+
def __init__(
|
|
672
|
+
self,
|
|
673
|
+
api_base: str = DEFAULT_LITELLM_API_BASE,
|
|
674
|
+
api_key: str | None = None,
|
|
675
|
+
model: str = DEFAULT_EMBEDDINGS_LITELLM_MODEL,
|
|
676
|
+
batch_size: int = 100,
|
|
677
|
+
timeout: float = 60.0,
|
|
678
|
+
):
|
|
679
|
+
"""
|
|
680
|
+
Initialize LiteLLM embeddings client.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
api_base: Base URL of the LiteLLM proxy (default: http://localhost:4000)
|
|
684
|
+
api_key: API key for the LiteLLM proxy (optional, depends on proxy config)
|
|
685
|
+
model: Embedding model name (default: text-embedding-3-small)
|
|
686
|
+
Use provider prefix for non-OpenAI models (e.g., cohere/embed-english-v3.0)
|
|
687
|
+
batch_size: Maximum batch size for embedding requests (default: 100)
|
|
688
|
+
timeout: Request timeout in seconds (default: 60.0)
|
|
689
|
+
"""
|
|
690
|
+
self.api_base = api_base.rstrip("/")
|
|
691
|
+
self.api_key = api_key
|
|
692
|
+
self.model = model
|
|
693
|
+
self.batch_size = batch_size
|
|
694
|
+
self.timeout = timeout
|
|
695
|
+
self._client: httpx.Client | None = None
|
|
696
|
+
self._dimension: int | None = None
|
|
697
|
+
|
|
698
|
+
@property
|
|
699
|
+
def provider_name(self) -> str:
|
|
700
|
+
return "litellm"
|
|
701
|
+
|
|
702
|
+
@property
|
|
703
|
+
def dimension(self) -> int:
|
|
704
|
+
if self._dimension is None:
|
|
705
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
706
|
+
return self._dimension
|
|
707
|
+
|
|
708
|
+
async def initialize(self) -> None:
|
|
709
|
+
"""Initialize the HTTP client and detect embedding dimension."""
|
|
710
|
+
if self._client is not None:
|
|
711
|
+
return
|
|
712
|
+
|
|
713
|
+
logger.info(f"Embeddings: initializing LiteLLM provider at {self.api_base} with model {self.model}")
|
|
714
|
+
|
|
715
|
+
headers = {"Content-Type": "application/json"}
|
|
716
|
+
if self.api_key:
|
|
717
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
718
|
+
|
|
719
|
+
self._client = httpx.Client(timeout=self.timeout, headers=headers)
|
|
720
|
+
|
|
721
|
+
# Do a test embedding to detect dimension
|
|
722
|
+
try:
|
|
723
|
+
response = self._client.post(
|
|
724
|
+
f"{self.api_base}/embeddings",
|
|
725
|
+
json={"model": self.model, "input": ["test"]},
|
|
726
|
+
)
|
|
727
|
+
response.raise_for_status()
|
|
728
|
+
result = response.json()
|
|
729
|
+
if result.get("data") and len(result["data"]) > 0:
|
|
730
|
+
self._dimension = len(result["data"][0]["embedding"])
|
|
731
|
+
logger.info(f"Embeddings: LiteLLM provider initialized (model: {self.model}, dim: {self._dimension})")
|
|
732
|
+
except httpx.HTTPError as e:
|
|
733
|
+
raise RuntimeError(f"Failed to connect to LiteLLM proxy at {self.api_base}: {e}")
|
|
734
|
+
|
|
735
|
+
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
736
|
+
"""
|
|
737
|
+
Generate embeddings using the LiteLLM proxy.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
texts: List of text strings to encode
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
List of embedding vectors
|
|
744
|
+
"""
|
|
745
|
+
if self._client is None:
|
|
746
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
747
|
+
|
|
748
|
+
if not texts:
|
|
749
|
+
return []
|
|
750
|
+
|
|
751
|
+
all_embeddings = []
|
|
752
|
+
|
|
753
|
+
# Process in batches
|
|
754
|
+
for i in range(0, len(texts), self.batch_size):
|
|
755
|
+
batch = texts[i : i + self.batch_size]
|
|
756
|
+
|
|
757
|
+
response = self._client.post(
|
|
758
|
+
f"{self.api_base}/embeddings",
|
|
759
|
+
json={"model": self.model, "input": batch},
|
|
760
|
+
)
|
|
761
|
+
response.raise_for_status()
|
|
762
|
+
result = response.json()
|
|
763
|
+
|
|
764
|
+
# Sort by index to ensure correct order
|
|
765
|
+
batch_embeddings = sorted(result["data"], key=lambda x: x["index"])
|
|
766
|
+
all_embeddings.extend([e["embedding"] for e in batch_embeddings])
|
|
767
|
+
|
|
768
|
+
return all_embeddings
|
|
769
|
+
|
|
770
|
+
|
|
272
771
|
def create_embeddings_from_env() -> Embeddings:
|
|
273
772
|
"""
|
|
274
773
|
Create an Embeddings instance based on environment variables.
|
|
@@ -289,5 +788,30 @@ def create_embeddings_from_env() -> Embeddings:
|
|
|
289
788
|
model = os.environ.get(ENV_EMBEDDINGS_LOCAL_MODEL)
|
|
290
789
|
model_name = model or DEFAULT_EMBEDDINGS_LOCAL_MODEL
|
|
291
790
|
return LocalSTEmbeddings(model_name=model_name)
|
|
791
|
+
elif provider == "openai":
|
|
792
|
+
# Use dedicated embeddings API key, or fall back to LLM API key
|
|
793
|
+
api_key = os.environ.get(ENV_EMBEDDINGS_OPENAI_API_KEY) or os.environ.get(ENV_LLM_API_KEY)
|
|
794
|
+
if not api_key:
|
|
795
|
+
raise ValueError(
|
|
796
|
+
f"{ENV_EMBEDDINGS_OPENAI_API_KEY} or {ENV_LLM_API_KEY} is required "
|
|
797
|
+
f"when {ENV_EMBEDDINGS_PROVIDER} is 'openai'"
|
|
798
|
+
)
|
|
799
|
+
model = os.environ.get(ENV_EMBEDDINGS_OPENAI_MODEL, DEFAULT_EMBEDDINGS_OPENAI_MODEL)
|
|
800
|
+
base_url = os.environ.get(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None
|
|
801
|
+
return OpenAIEmbeddings(api_key=api_key, model=model, base_url=base_url)
|
|
802
|
+
elif provider == "cohere":
|
|
803
|
+
api_key = os.environ.get(ENV_COHERE_API_KEY)
|
|
804
|
+
if not api_key:
|
|
805
|
+
raise ValueError(f"{ENV_COHERE_API_KEY} is required when {ENV_EMBEDDINGS_PROVIDER} is 'cohere'")
|
|
806
|
+
model = os.environ.get(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL)
|
|
807
|
+
base_url = os.environ.get(ENV_EMBEDDINGS_COHERE_BASE_URL) or None
|
|
808
|
+
return CohereEmbeddings(api_key=api_key, model=model, base_url=base_url)
|
|
809
|
+
elif provider == "litellm":
|
|
810
|
+
api_base = os.environ.get(ENV_LITELLM_API_BASE, DEFAULT_LITELLM_API_BASE)
|
|
811
|
+
api_key = os.environ.get(ENV_LITELLM_API_KEY)
|
|
812
|
+
model = os.environ.get(ENV_EMBEDDINGS_LITELLM_MODEL, DEFAULT_EMBEDDINGS_LITELLM_MODEL)
|
|
813
|
+
return LiteLLMEmbeddings(api_base=api_base, api_key=api_key, model=model)
|
|
292
814
|
else:
|
|
293
|
-
raise ValueError(
|
|
815
|
+
raise ValueError(
|
|
816
|
+
f"Unknown embeddings provider: {provider}. Supported: 'local', 'tei', 'openai', 'cohere', 'litellm'"
|
|
817
|
+
)
|