hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -2
- hindsight_api/alembic/README +1 -0
- hindsight_api/alembic/env.py +146 -0
- hindsight_api/alembic/script.py.mako +28 -0
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
- hindsight_api/api/__init__.py +2 -4
- hindsight_api/api/http.py +112 -164
- hindsight_api/api/mcp.py +2 -1
- hindsight_api/config.py +154 -0
- hindsight_api/engine/__init__.py +7 -2
- hindsight_api/engine/cross_encoder.py +225 -16
- hindsight_api/engine/embeddings.py +198 -19
- hindsight_api/engine/entity_resolver.py +56 -29
- hindsight_api/engine/llm_wrapper.py +147 -106
- hindsight_api/engine/memory_engine.py +337 -192
- hindsight_api/engine/response_models.py +15 -17
- hindsight_api/engine/retain/bank_utils.py +25 -35
- hindsight_api/engine/retain/entity_processing.py +5 -5
- hindsight_api/engine/retain/fact_extraction.py +86 -24
- hindsight_api/engine/retain/fact_storage.py +1 -1
- hindsight_api/engine/retain/link_creation.py +12 -6
- hindsight_api/engine/retain/link_utils.py +50 -56
- hindsight_api/engine/retain/observation_regeneration.py +264 -0
- hindsight_api/engine/retain/orchestrator.py +31 -44
- hindsight_api/engine/retain/types.py +14 -0
- hindsight_api/engine/search/reranking.py +6 -10
- hindsight_api/engine/search/retrieval.py +2 -2
- hindsight_api/engine/search/think_utils.py +59 -30
- hindsight_api/engine/search/tracer.py +1 -1
- hindsight_api/main.py +201 -0
- hindsight_api/migrations.py +61 -39
- hindsight_api/models.py +1 -2
- hindsight_api/pg0.py +17 -36
- hindsight_api/server.py +43 -0
- {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
- hindsight_api-0.1.1.dist-info/RECORD +60 -0
- hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
- hindsight_api/cli.py +0 -128
- hindsight_api/web/__init__.py +0 -12
- hindsight_api/web/server.py +0 -109
- hindsight_api-0.0.21.dist-info/RECORD +0 -50
- hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
- {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -5,15 +5,26 @@ Provides an interface for generating embeddings with different backends.
|
|
|
5
5
|
|
|
6
6
|
IMPORTANT: All embeddings must produce 384-dimensional vectors to match
|
|
7
7
|
the database schema (pgvector column defined as vector(384)).
|
|
8
|
+
|
|
9
|
+
Configuration via environment variables - see hindsight_api.config for all env var names.
|
|
8
10
|
"""
|
|
9
11
|
from abc import ABC, abstractmethod
|
|
10
|
-
from typing import List
|
|
12
|
+
from typing import List, Optional
|
|
11
13
|
import logging
|
|
14
|
+
import os
|
|
12
15
|
|
|
13
|
-
|
|
16
|
+
import httpx
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
18
|
+
from ..config import (
|
|
19
|
+
ENV_EMBEDDINGS_PROVIDER,
|
|
20
|
+
ENV_EMBEDDINGS_LOCAL_MODEL,
|
|
21
|
+
ENV_EMBEDDINGS_TEI_URL,
|
|
22
|
+
DEFAULT_EMBEDDINGS_PROVIDER,
|
|
23
|
+
DEFAULT_EMBEDDINGS_LOCAL_MODEL,
|
|
24
|
+
EMBEDDING_DIMENSION,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
17
28
|
|
|
18
29
|
|
|
19
30
|
class Embeddings(ABC):
|
|
@@ -24,12 +35,18 @@ class Embeddings(ABC):
|
|
|
24
35
|
the database schema.
|
|
25
36
|
"""
|
|
26
37
|
|
|
38
|
+
@property
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def provider_name(self) -> str:
|
|
41
|
+
"""Return a human-readable name for this provider (e.g., 'local', 'tei')."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
27
44
|
@abstractmethod
|
|
28
|
-
def
|
|
45
|
+
async def initialize(self) -> None:
|
|
29
46
|
"""
|
|
30
|
-
|
|
47
|
+
Initialize the embedding model asynchronously.
|
|
31
48
|
|
|
32
|
-
This should be called during
|
|
49
|
+
This should be called during startup to load/connect to the model
|
|
33
50
|
and avoid cold start latency on first encode() call.
|
|
34
51
|
"""
|
|
35
52
|
pass
|
|
@@ -48,29 +65,33 @@ class Embeddings(ABC):
|
|
|
48
65
|
pass
|
|
49
66
|
|
|
50
67
|
|
|
51
|
-
class
|
|
68
|
+
class LocalSTEmbeddings(Embeddings):
|
|
52
69
|
"""
|
|
53
|
-
|
|
70
|
+
Local embeddings implementation using SentenceTransformers.
|
|
54
71
|
|
|
55
|
-
Call
|
|
72
|
+
Call initialize() during startup to load the model and avoid cold starts.
|
|
56
73
|
|
|
57
74
|
Default model is BAAI/bge-small-en-v1.5 which produces 384-dimensional
|
|
58
75
|
embeddings matching the database schema.
|
|
59
76
|
"""
|
|
60
77
|
|
|
61
|
-
def __init__(self, model_name: str =
|
|
78
|
+
def __init__(self, model_name: Optional[str] = None):
|
|
62
79
|
"""
|
|
63
|
-
Initialize SentenceTransformers embeddings.
|
|
80
|
+
Initialize local SentenceTransformers embeddings.
|
|
64
81
|
|
|
65
82
|
Args:
|
|
66
83
|
model_name: Name of the SentenceTransformer model to use.
|
|
67
84
|
Must produce 384-dimensional embeddings.
|
|
68
85
|
Default: BAAI/bge-small-en-v1.5
|
|
69
86
|
"""
|
|
70
|
-
self.model_name = model_name
|
|
87
|
+
self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
|
|
71
88
|
self._model = None
|
|
72
89
|
|
|
73
|
-
|
|
90
|
+
@property
|
|
91
|
+
def provider_name(self) -> str:
|
|
92
|
+
return "local"
|
|
93
|
+
|
|
94
|
+
async def initialize(self) -> None:
|
|
74
95
|
"""Load the embedding model."""
|
|
75
96
|
if self._model is not None:
|
|
76
97
|
return
|
|
@@ -79,12 +100,17 @@ class SentenceTransformersEmbeddings(Embeddings):
|
|
|
79
100
|
from sentence_transformers import SentenceTransformer
|
|
80
101
|
except ImportError:
|
|
81
102
|
raise ImportError(
|
|
82
|
-
"sentence-transformers is required for
|
|
103
|
+
"sentence-transformers is required for LocalSTEmbeddings. "
|
|
83
104
|
"Install it with: pip install sentence-transformers"
|
|
84
105
|
)
|
|
85
106
|
|
|
86
|
-
logger.info(f"
|
|
87
|
-
|
|
107
|
+
logger.info(f"Embeddings: initializing local provider with model {self.model_name}")
|
|
108
|
+
# Disable lazy loading (meta tensors) which causes issues with newer transformers/accelerate
|
|
109
|
+
# Setting low_cpu_mem_usage=False and device_map=None ensures tensors are fully materialized
|
|
110
|
+
self._model = SentenceTransformer(
|
|
111
|
+
self.model_name,
|
|
112
|
+
model_kwargs={"low_cpu_mem_usage": False, "device_map": None},
|
|
113
|
+
)
|
|
88
114
|
|
|
89
115
|
# Validate dimension matches database schema
|
|
90
116
|
model_dim = self._model.get_sentence_embedding_dimension()
|
|
@@ -95,7 +121,7 @@ class SentenceTransformersEmbeddings(Embeddings):
|
|
|
95
121
|
f"Use a model that produces {EMBEDDING_DIMENSION}-dimensional embeddings."
|
|
96
122
|
)
|
|
97
123
|
|
|
98
|
-
logger.info(f"
|
|
124
|
+
logger.info(f"Embeddings: local provider initialized (dim: {model_dim})")
|
|
99
125
|
|
|
100
126
|
def encode(self, texts: List[str]) -> List[List[float]]:
|
|
101
127
|
"""
|
|
@@ -108,6 +134,159 @@ class SentenceTransformersEmbeddings(Embeddings):
|
|
|
108
134
|
List of 384-dimensional embedding vectors
|
|
109
135
|
"""
|
|
110
136
|
if self._model is None:
|
|
111
|
-
|
|
137
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
112
138
|
embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
|
113
139
|
return [emb.tolist() for emb in embeddings]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class RemoteTEIEmbeddings(Embeddings):
|
|
143
|
+
"""
|
|
144
|
+
Remote embeddings implementation using HuggingFace Text Embeddings Inference (TEI) HTTP API.
|
|
145
|
+
|
|
146
|
+
TEI provides a high-performance inference server for embedding models.
|
|
147
|
+
See: https://github.com/huggingface/text-embeddings-inference
|
|
148
|
+
|
|
149
|
+
The server should be running a model that produces 384-dimensional embeddings.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(
|
|
153
|
+
self,
|
|
154
|
+
base_url: str,
|
|
155
|
+
timeout: float = 30.0,
|
|
156
|
+
batch_size: int = 32,
|
|
157
|
+
max_retries: int = 3,
|
|
158
|
+
retry_delay: float = 0.5,
|
|
159
|
+
):
|
|
160
|
+
"""
|
|
161
|
+
Initialize remote TEI embeddings client.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
base_url: Base URL of the TEI server (e.g., "http://localhost:8080")
|
|
165
|
+
timeout: Request timeout in seconds (default: 30.0)
|
|
166
|
+
batch_size: Maximum batch size for embedding requests (default: 32)
|
|
167
|
+
max_retries: Maximum number of retries for failed requests (default: 3)
|
|
168
|
+
retry_delay: Initial delay between retries in seconds, doubles each retry (default: 0.5)
|
|
169
|
+
"""
|
|
170
|
+
self.base_url = base_url.rstrip("/")
|
|
171
|
+
self.timeout = timeout
|
|
172
|
+
self.batch_size = batch_size
|
|
173
|
+
self.max_retries = max_retries
|
|
174
|
+
self.retry_delay = retry_delay
|
|
175
|
+
self._client: Optional[httpx.Client] = None
|
|
176
|
+
self._model_id: Optional[str] = None
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def provider_name(self) -> str:
|
|
180
|
+
return "tei"
|
|
181
|
+
|
|
182
|
+
def _request_with_retry(self, method: str, url: str, **kwargs) -> httpx.Response:
|
|
183
|
+
"""Make an HTTP request with automatic retries on transient errors."""
|
|
184
|
+
import time
|
|
185
|
+
last_error = None
|
|
186
|
+
delay = self.retry_delay
|
|
187
|
+
|
|
188
|
+
for attempt in range(self.max_retries + 1):
|
|
189
|
+
try:
|
|
190
|
+
if method == "GET":
|
|
191
|
+
response = self._client.get(url, **kwargs)
|
|
192
|
+
else:
|
|
193
|
+
response = self._client.post(url, **kwargs)
|
|
194
|
+
response.raise_for_status()
|
|
195
|
+
return response
|
|
196
|
+
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout) as e:
|
|
197
|
+
last_error = e
|
|
198
|
+
if attempt < self.max_retries:
|
|
199
|
+
logger.warning(f"TEI request failed (attempt {attempt + 1}/{self.max_retries + 1}): {e}. Retrying in {delay}s...")
|
|
200
|
+
time.sleep(delay)
|
|
201
|
+
delay *= 2 # Exponential backoff
|
|
202
|
+
except httpx.HTTPStatusError as e:
|
|
203
|
+
# Retry on 5xx server errors
|
|
204
|
+
if e.response.status_code >= 500 and attempt < self.max_retries:
|
|
205
|
+
last_error = e
|
|
206
|
+
logger.warning(f"TEI server error (attempt {attempt + 1}/{self.max_retries + 1}): {e}. Retrying in {delay}s...")
|
|
207
|
+
time.sleep(delay)
|
|
208
|
+
delay *= 2
|
|
209
|
+
else:
|
|
210
|
+
raise
|
|
211
|
+
|
|
212
|
+
raise last_error
|
|
213
|
+
|
|
214
|
+
async def initialize(self) -> None:
|
|
215
|
+
"""Initialize the HTTP client and verify server connectivity."""
|
|
216
|
+
if self._client is not None:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
logger.info(f"Embeddings: initializing TEI provider at {self.base_url}")
|
|
220
|
+
self._client = httpx.Client(timeout=self.timeout)
|
|
221
|
+
|
|
222
|
+
# Verify server is reachable and get model info
|
|
223
|
+
try:
|
|
224
|
+
response = self._request_with_retry("GET", f"{self.base_url}/info")
|
|
225
|
+
info = response.json()
|
|
226
|
+
self._model_id = info.get("model_id", "unknown")
|
|
227
|
+
logger.info(f"Embeddings: TEI provider initialized (model: {self._model_id})")
|
|
228
|
+
except httpx.HTTPError as e:
|
|
229
|
+
raise RuntimeError(f"Failed to connect to TEI server at {self.base_url}: {e}")
|
|
230
|
+
|
|
231
|
+
def encode(self, texts: List[str]) -> List[List[float]]:
|
|
232
|
+
"""
|
|
233
|
+
Generate embeddings using the remote TEI server.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
texts: List of text strings to encode
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
List of embedding vectors
|
|
240
|
+
"""
|
|
241
|
+
if self._client is None:
|
|
242
|
+
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
243
|
+
|
|
244
|
+
if not texts:
|
|
245
|
+
return []
|
|
246
|
+
|
|
247
|
+
all_embeddings = []
|
|
248
|
+
|
|
249
|
+
# Process in batches
|
|
250
|
+
for i in range(0, len(texts), self.batch_size):
|
|
251
|
+
batch = texts[i:i + self.batch_size]
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
response = self._request_with_retry(
|
|
255
|
+
"POST",
|
|
256
|
+
f"{self.base_url}/embed",
|
|
257
|
+
json={"inputs": batch},
|
|
258
|
+
)
|
|
259
|
+
batch_embeddings = response.json()
|
|
260
|
+
all_embeddings.extend(batch_embeddings)
|
|
261
|
+
except httpx.HTTPError as e:
|
|
262
|
+
raise RuntimeError(f"TEI embedding request failed: {e}")
|
|
263
|
+
|
|
264
|
+
return all_embeddings
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def create_embeddings_from_env() -> Embeddings:
|
|
268
|
+
"""
|
|
269
|
+
Create an Embeddings instance based on environment variables.
|
|
270
|
+
|
|
271
|
+
See hindsight_api.config for environment variable names and defaults.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Configured Embeddings instance
|
|
275
|
+
"""
|
|
276
|
+
provider = os.environ.get(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER).lower()
|
|
277
|
+
|
|
278
|
+
if provider == "tei":
|
|
279
|
+
url = os.environ.get(ENV_EMBEDDINGS_TEI_URL)
|
|
280
|
+
if not url:
|
|
281
|
+
raise ValueError(
|
|
282
|
+
f"{ENV_EMBEDDINGS_TEI_URL} is required when {ENV_EMBEDDINGS_PROVIDER} is 'tei'"
|
|
283
|
+
)
|
|
284
|
+
return RemoteTEIEmbeddings(base_url=url)
|
|
285
|
+
elif provider == "local":
|
|
286
|
+
model = os.environ.get(ENV_EMBEDDINGS_LOCAL_MODEL)
|
|
287
|
+
model_name = model or DEFAULT_EMBEDDINGS_LOCAL_MODEL
|
|
288
|
+
return LocalSTEmbeddings(model_name=model_name)
|
|
289
|
+
else:
|
|
290
|
+
raise ValueError(
|
|
291
|
+
f"Unknown embeddings provider: {provider}. Supported: 'local', 'tei'"
|
|
292
|
+
)
|
|
@@ -126,18 +126,20 @@ class EntityResolver:
|
|
|
126
126
|
|
|
127
127
|
# Resolve each entity using pre-fetched candidates
|
|
128
128
|
entity_ids = [None] * len(entities_data)
|
|
129
|
-
entities_to_update = [] # (entity_id,
|
|
130
|
-
entities_to_create = [] # (idx, entity_data)
|
|
129
|
+
entities_to_update = [] # (entity_id, event_date)
|
|
130
|
+
entities_to_create = [] # (idx, entity_data, event_date)
|
|
131
131
|
|
|
132
132
|
for idx, entity_data in enumerate(entities_data):
|
|
133
133
|
entity_text = entity_data['text']
|
|
134
134
|
nearby_entities = entity_data.get('nearby_entities', [])
|
|
135
|
+
# Use per-entity date if available, otherwise fall back to batch-level date
|
|
136
|
+
entity_event_date = entity_data.get('event_date', unit_event_date)
|
|
135
137
|
|
|
136
138
|
candidates = all_candidates.get(entity_text, [])
|
|
137
139
|
|
|
138
140
|
if not candidates:
|
|
139
141
|
# Will create new entity
|
|
140
|
-
entities_to_create.append((idx, entity_data))
|
|
142
|
+
entities_to_create.append((idx, entity_data, entity_event_date))
|
|
141
143
|
continue
|
|
142
144
|
|
|
143
145
|
# Score candidates
|
|
@@ -165,9 +167,9 @@ class EntityResolver:
|
|
|
165
167
|
score += co_entity_score * 0.3
|
|
166
168
|
|
|
167
169
|
# 3. Temporal proximity (0-0.2)
|
|
168
|
-
if last_seen:
|
|
170
|
+
if last_seen and entity_event_date:
|
|
169
171
|
# Normalize timezone awareness for comparison
|
|
170
|
-
event_date_utc =
|
|
172
|
+
event_date_utc = entity_event_date if entity_event_date.tzinfo else entity_event_date.replace(tzinfo=timezone.utc)
|
|
171
173
|
last_seen_utc = last_seen if last_seen.tzinfo else last_seen.replace(tzinfo=timezone.utc)
|
|
172
174
|
days_diff = abs((event_date_utc - last_seen_utc).total_seconds() / 86400)
|
|
173
175
|
if days_diff < 7:
|
|
@@ -183,9 +185,9 @@ class EntityResolver:
|
|
|
183
185
|
|
|
184
186
|
if best_score > threshold:
|
|
185
187
|
entity_ids[idx] = best_candidate
|
|
186
|
-
entities_to_update.append((best_candidate,
|
|
188
|
+
entities_to_update.append((best_candidate, entity_event_date))
|
|
187
189
|
else:
|
|
188
|
-
entities_to_create.append((idx, entity_data))
|
|
190
|
+
entities_to_create.append((idx, entity_data, entity_event_date))
|
|
189
191
|
|
|
190
192
|
# Batch update existing entities
|
|
191
193
|
if entities_to_update:
|
|
@@ -199,29 +201,54 @@ class EntityResolver:
|
|
|
199
201
|
entities_to_update
|
|
200
202
|
)
|
|
201
203
|
|
|
202
|
-
#
|
|
203
|
-
# This
|
|
204
|
-
# only one succeeds and the other gets the existing ID
|
|
204
|
+
# Batch create new entities using COPY + INSERT for maximum speed
|
|
205
|
+
# This handles duplicates via ON CONFLICT and returns all IDs
|
|
205
206
|
if entities_to_create:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
207
|
+
# Group entities by canonical name (lowercase) to handle duplicates within batch
|
|
208
|
+
# For duplicates, we only insert once and reuse the ID
|
|
209
|
+
unique_entities = {} # lowercase_name -> (entity_data, event_date, [indices])
|
|
210
|
+
for idx, entity_data, event_date in entities_to_create:
|
|
211
|
+
name_lower = entity_data['text'].lower()
|
|
212
|
+
if name_lower not in unique_entities:
|
|
213
|
+
unique_entities[name_lower] = (entity_data, event_date, [idx])
|
|
214
|
+
else:
|
|
215
|
+
# Same entity appears multiple times - add index to list
|
|
216
|
+
unique_entities[name_lower][2].append(idx)
|
|
217
|
+
|
|
218
|
+
# Batch insert unique entities and get their IDs
|
|
219
|
+
# Use a single query with unnest for speed
|
|
220
|
+
entity_names = []
|
|
221
|
+
entity_dates = []
|
|
222
|
+
indices_map = [] # Maps result index -> list of original indices
|
|
223
|
+
|
|
224
|
+
for name_lower, (entity_data, event_date, indices) in unique_entities.items():
|
|
225
|
+
entity_names.append(entity_data['text'])
|
|
226
|
+
entity_dates.append(event_date)
|
|
227
|
+
indices_map.append(indices)
|
|
228
|
+
|
|
229
|
+
# Batch INSERT ... ON CONFLICT with RETURNING
|
|
230
|
+
# This is much faster than individual inserts
|
|
231
|
+
rows = await conn.fetch(
|
|
232
|
+
"""
|
|
233
|
+
INSERT INTO entities (bank_id, canonical_name, first_seen, last_seen, mention_count)
|
|
234
|
+
SELECT $1, name, event_date, event_date, 1
|
|
235
|
+
FROM unnest($2::text[], $3::timestamptz[]) AS t(name, event_date)
|
|
236
|
+
ON CONFLICT (bank_id, LOWER(canonical_name))
|
|
237
|
+
DO UPDATE SET
|
|
238
|
+
mention_count = entities.mention_count + 1,
|
|
239
|
+
last_seen = EXCLUDED.last_seen
|
|
240
|
+
RETURNING id
|
|
241
|
+
""",
|
|
242
|
+
bank_id,
|
|
243
|
+
entity_names,
|
|
244
|
+
entity_dates
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Map returned IDs back to original indices
|
|
248
|
+
for result_idx, row in enumerate(rows):
|
|
249
|
+
entity_id = row['id']
|
|
250
|
+
for original_idx in indices_map[result_idx]:
|
|
251
|
+
entity_ids[original_idx] = entity_id
|
|
225
252
|
|
|
226
253
|
return entity_ids
|
|
227
254
|
|