hindsight-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +31 -33
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +17 -12
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +23 -27
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +74 -88
- hindsight_api/engine/memory_engine.py +663 -673
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +15 -1
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +234 -0
- hindsight_api/engine/search/mpfp_retrieval.py +438 -0
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +388 -193
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -38
- hindsight_api/engine/search/tracer.py +49 -35
- hindsight_api/engine/search/types.py +22 -16
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +64 -337
- hindsight_api/server.py +3 -6
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +6 -5
- hindsight_api-0.1.6.dist-info/RECORD +64 -0
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.4.dist-info/RECORD +0 -61
- {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
|
@@ -4,12 +4,13 @@ Entity extraction and resolution for memory system.
|
|
|
4
4
|
Uses spaCy for entity extraction and implements resolution logic
|
|
5
5
|
to disambiguate entities across memory units.
|
|
6
6
|
"""
|
|
7
|
-
|
|
8
|
-
from
|
|
7
|
+
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
9
|
from difflib import SequenceMatcher
|
|
10
|
-
from datetime import datetime, timezone
|
|
11
|
-
from .db_utils import acquire_with_retry
|
|
12
10
|
|
|
11
|
+
import asyncpg
|
|
12
|
+
|
|
13
|
+
from .db_utils import acquire_with_retry
|
|
13
14
|
|
|
14
15
|
# Load spaCy model (singleton)
|
|
15
16
|
_nlp = None
|
|
@@ -32,11 +33,11 @@ class EntityResolver:
|
|
|
32
33
|
async def resolve_entities_batch(
|
|
33
34
|
self,
|
|
34
35
|
bank_id: str,
|
|
35
|
-
entities_data:
|
|
36
|
+
entities_data: list[dict],
|
|
36
37
|
context: str,
|
|
37
38
|
unit_event_date,
|
|
38
39
|
conn=None,
|
|
39
|
-
) ->
|
|
40
|
+
) -> list[str]:
|
|
40
41
|
"""
|
|
41
42
|
Resolve multiple entities in batch (MUCH faster than sequential).
|
|
42
43
|
|
|
@@ -62,7 +63,9 @@ class EntityResolver:
|
|
|
62
63
|
else:
|
|
63
64
|
return await self._resolve_entities_batch_impl(conn, bank_id, entities_data, context, unit_event_date)
|
|
64
65
|
|
|
65
|
-
async def _resolve_entities_batch_impl(
|
|
66
|
+
async def _resolve_entities_batch_impl(
|
|
67
|
+
self, conn, bank_id: str, entities_data: list[dict], context: str, unit_event_date
|
|
68
|
+
) -> list[str]:
|
|
66
69
|
# Query ALL candidates for this bank
|
|
67
70
|
all_entities = await conn.fetch(
|
|
68
71
|
"""
|
|
@@ -70,11 +73,11 @@ class EntityResolver:
|
|
|
70
73
|
FROM entities
|
|
71
74
|
WHERE bank_id = $1
|
|
72
75
|
""",
|
|
73
|
-
bank_id
|
|
76
|
+
bank_id,
|
|
74
77
|
)
|
|
75
78
|
|
|
76
79
|
# Build entity ID to name mapping for co-occurrence lookups
|
|
77
|
-
entity_id_to_name = {row[
|
|
80
|
+
entity_id_to_name = {row["id"]: row["canonical_name"].lower() for row in all_entities}
|
|
78
81
|
|
|
79
82
|
# Query ALL co-occurrences for this bank's entities in one query
|
|
80
83
|
# This builds a map of entity_id -> set of co-occurring entity names
|
|
@@ -85,13 +88,13 @@ class EntityResolver:
|
|
|
85
88
|
WHERE ec.entity_id_1 IN (SELECT id FROM entities WHERE bank_id = $1)
|
|
86
89
|
OR ec.entity_id_2 IN (SELECT id FROM entities WHERE bank_id = $1)
|
|
87
90
|
""",
|
|
88
|
-
bank_id
|
|
91
|
+
bank_id,
|
|
89
92
|
)
|
|
90
93
|
|
|
91
94
|
# Build co-occurrence map: entity_id -> set of co-occurring entity names (lowercase)
|
|
92
|
-
cooccurrence_map:
|
|
95
|
+
cooccurrence_map: dict[str, set[str]] = {}
|
|
93
96
|
for row in all_cooccurrences:
|
|
94
|
-
eid1, eid2 = row[
|
|
97
|
+
eid1, eid2 = row["entity_id_1"], row["entity_id_2"]
|
|
95
98
|
# Add both directions
|
|
96
99
|
if eid1 not in cooccurrence_map:
|
|
97
100
|
cooccurrence_map[eid1] = set()
|
|
@@ -105,22 +108,24 @@ class EntityResolver:
|
|
|
105
108
|
|
|
106
109
|
# Build candidate map for each entity text
|
|
107
110
|
all_candidates = {} # Maps entity_text -> list of candidates
|
|
108
|
-
entity_texts = list(set(e[
|
|
111
|
+
entity_texts = list(set(e["text"] for e in entities_data))
|
|
109
112
|
|
|
110
113
|
for entity_text in entity_texts:
|
|
111
114
|
matching = []
|
|
112
115
|
entity_text_lower = entity_text.lower()
|
|
113
116
|
for row in all_entities:
|
|
114
|
-
canonical_name = row[
|
|
115
|
-
ent_id = row[
|
|
116
|
-
metadata = row[
|
|
117
|
-
last_seen = row[
|
|
118
|
-
mention_count = row[
|
|
117
|
+
canonical_name = row["canonical_name"]
|
|
118
|
+
ent_id = row["id"]
|
|
119
|
+
metadata = row["metadata"]
|
|
120
|
+
last_seen = row["last_seen"]
|
|
121
|
+
mention_count = row["mention_count"]
|
|
119
122
|
canonical_lower = canonical_name.lower()
|
|
120
123
|
# Match if exact or substring match
|
|
121
|
-
if (
|
|
122
|
-
entity_text_lower
|
|
123
|
-
|
|
124
|
+
if (
|
|
125
|
+
entity_text_lower == canonical_lower
|
|
126
|
+
or entity_text_lower in canonical_lower
|
|
127
|
+
or canonical_lower in entity_text_lower
|
|
128
|
+
):
|
|
124
129
|
matching.append((ent_id, canonical_name, metadata, last_seen, mention_count))
|
|
125
130
|
all_candidates[entity_text] = matching
|
|
126
131
|
|
|
@@ -130,10 +135,10 @@ class EntityResolver:
|
|
|
130
135
|
entities_to_create = [] # (idx, entity_data, event_date)
|
|
131
136
|
|
|
132
137
|
for idx, entity_data in enumerate(entities_data):
|
|
133
|
-
entity_text = entity_data[
|
|
134
|
-
nearby_entities = entity_data.get(
|
|
138
|
+
entity_text = entity_data["text"]
|
|
139
|
+
nearby_entities = entity_data.get("nearby_entities", [])
|
|
135
140
|
# Use per-entity date if available, otherwise fall back to batch-level date
|
|
136
|
-
entity_event_date = entity_data.get(
|
|
141
|
+
entity_event_date = entity_data.get("event_date", unit_event_date)
|
|
137
142
|
|
|
138
143
|
candidates = all_candidates.get(entity_text, [])
|
|
139
144
|
|
|
@@ -146,17 +151,13 @@ class EntityResolver:
|
|
|
146
151
|
best_candidate = None
|
|
147
152
|
best_score = 0.0
|
|
148
153
|
|
|
149
|
-
nearby_entity_set = {e[
|
|
154
|
+
nearby_entity_set = {e["text"].lower() for e in nearby_entities if e["text"] != entity_text}
|
|
150
155
|
|
|
151
156
|
for candidate_id, canonical_name, metadata, last_seen, mention_count in candidates:
|
|
152
157
|
score = 0.0
|
|
153
158
|
|
|
154
159
|
# 1. Name similarity (0-0.5)
|
|
155
|
-
name_similarity = SequenceMatcher(
|
|
156
|
-
None,
|
|
157
|
-
entity_text.lower(),
|
|
158
|
-
canonical_name.lower()
|
|
159
|
-
).ratio()
|
|
160
|
+
name_similarity = SequenceMatcher(None, entity_text.lower(), canonical_name.lower()).ratio()
|
|
160
161
|
score += name_similarity * 0.5
|
|
161
162
|
|
|
162
163
|
# 2. Co-occurring entities (0-0.3)
|
|
@@ -169,8 +170,10 @@ class EntityResolver:
|
|
|
169
170
|
# 3. Temporal proximity (0-0.2)
|
|
170
171
|
if last_seen and entity_event_date:
|
|
171
172
|
# Normalize timezone awareness for comparison
|
|
172
|
-
event_date_utc =
|
|
173
|
-
|
|
173
|
+
event_date_utc = (
|
|
174
|
+
entity_event_date if entity_event_date.tzinfo else entity_event_date.replace(tzinfo=UTC)
|
|
175
|
+
)
|
|
176
|
+
last_seen_utc = last_seen if last_seen.tzinfo else last_seen.replace(tzinfo=UTC)
|
|
174
177
|
days_diff = abs((event_date_utc - last_seen_utc).total_seconds() / 86400)
|
|
175
178
|
if days_diff < 7:
|
|
176
179
|
temporal_score = max(0, 1.0 - (days_diff / 7))
|
|
@@ -198,7 +201,7 @@ class EntityResolver:
|
|
|
198
201
|
last_seen = $2
|
|
199
202
|
WHERE id = $1::uuid
|
|
200
203
|
""",
|
|
201
|
-
entities_to_update
|
|
204
|
+
entities_to_update,
|
|
202
205
|
)
|
|
203
206
|
|
|
204
207
|
# Batch create new entities using COPY + INSERT for maximum speed
|
|
@@ -208,7 +211,7 @@ class EntityResolver:
|
|
|
208
211
|
# For duplicates, we only insert once and reuse the ID
|
|
209
212
|
unique_entities = {} # lowercase_name -> (entity_data, event_date, [indices])
|
|
210
213
|
for idx, entity_data, event_date in entities_to_create:
|
|
211
|
-
name_lower = entity_data[
|
|
214
|
+
name_lower = entity_data["text"].lower()
|
|
212
215
|
if name_lower not in unique_entities:
|
|
213
216
|
unique_entities[name_lower] = (entity_data, event_date, [idx])
|
|
214
217
|
else:
|
|
@@ -222,7 +225,7 @@ class EntityResolver:
|
|
|
222
225
|
indices_map = [] # Maps result index -> list of original indices
|
|
223
226
|
|
|
224
227
|
for name_lower, (entity_data, event_date, indices) in unique_entities.items():
|
|
225
|
-
entity_names.append(entity_data[
|
|
228
|
+
entity_names.append(entity_data["text"])
|
|
226
229
|
entity_dates.append(event_date)
|
|
227
230
|
indices_map.append(indices)
|
|
228
231
|
|
|
@@ -241,12 +244,12 @@ class EntityResolver:
|
|
|
241
244
|
""",
|
|
242
245
|
bank_id,
|
|
243
246
|
entity_names,
|
|
244
|
-
entity_dates
|
|
247
|
+
entity_dates,
|
|
245
248
|
)
|
|
246
249
|
|
|
247
250
|
# Map returned IDs back to original indices
|
|
248
251
|
for result_idx, row in enumerate(rows):
|
|
249
|
-
entity_id = row[
|
|
252
|
+
entity_id = row["id"]
|
|
250
253
|
for original_idx in indices_map[result_idx]:
|
|
251
254
|
entity_ids[original_idx] = entity_id
|
|
252
255
|
|
|
@@ -257,7 +260,7 @@ class EntityResolver:
|
|
|
257
260
|
bank_id: str,
|
|
258
261
|
entity_text: str,
|
|
259
262
|
context: str,
|
|
260
|
-
nearby_entities:
|
|
263
|
+
nearby_entities: list[dict],
|
|
261
264
|
unit_event_date,
|
|
262
265
|
) -> str:
|
|
263
266
|
"""
|
|
@@ -287,14 +290,14 @@ class EntityResolver:
|
|
|
287
290
|
)
|
|
288
291
|
ORDER BY mention_count DESC
|
|
289
292
|
""",
|
|
290
|
-
bank_id,
|
|
293
|
+
bank_id,
|
|
294
|
+
entity_text,
|
|
295
|
+
f"%{entity_text}%",
|
|
291
296
|
)
|
|
292
297
|
|
|
293
298
|
if not candidates:
|
|
294
299
|
# New entity - create it
|
|
295
|
-
return await self._create_entity(
|
|
296
|
-
conn, bank_id, entity_text, unit_event_date
|
|
297
|
-
)
|
|
300
|
+
return await self._create_entity(conn, bank_id, entity_text, unit_event_date)
|
|
298
301
|
|
|
299
302
|
# Score candidates based on:
|
|
300
303
|
# 1. Name similarity
|
|
@@ -306,21 +309,17 @@ class EntityResolver:
|
|
|
306
309
|
best_score = 0.0
|
|
307
310
|
best_name_similarity = 0.0
|
|
308
311
|
|
|
309
|
-
nearby_entity_set = {e[
|
|
312
|
+
nearby_entity_set = {e["text"].lower() for e in nearby_entities if e["text"] != entity_text}
|
|
310
313
|
|
|
311
314
|
for row in candidates:
|
|
312
|
-
candidate_id = row[
|
|
313
|
-
canonical_name = row[
|
|
314
|
-
metadata = row[
|
|
315
|
-
last_seen = row[
|
|
315
|
+
candidate_id = row["id"]
|
|
316
|
+
canonical_name = row["canonical_name"]
|
|
317
|
+
metadata = row["metadata"]
|
|
318
|
+
last_seen = row["last_seen"]
|
|
316
319
|
score = 0.0
|
|
317
320
|
|
|
318
321
|
# 1. Name similarity (0-1)
|
|
319
|
-
name_similarity = SequenceMatcher(
|
|
320
|
-
None,
|
|
321
|
-
entity_text.lower(),
|
|
322
|
-
canonical_name.lower()
|
|
323
|
-
).ratio()
|
|
322
|
+
name_similarity = SequenceMatcher(None, entity_text.lower(), canonical_name.lower()).ratio()
|
|
324
323
|
score += name_similarity * 0.5
|
|
325
324
|
|
|
326
325
|
# 2. Co-occurring entities (0-0.5)
|
|
@@ -338,9 +337,9 @@ class EntityResolver:
|
|
|
338
337
|
)
|
|
339
338
|
WHERE ec.entity_id_1 = $1 OR ec.entity_id_2 = $1
|
|
340
339
|
""",
|
|
341
|
-
candidate_id
|
|
340
|
+
candidate_id,
|
|
342
341
|
)
|
|
343
|
-
co_entities = {r[
|
|
342
|
+
co_entities = {r["canonical_name"].lower() for r in co_entity_rows}
|
|
344
343
|
|
|
345
344
|
# Check overlap with nearby entities
|
|
346
345
|
overlap = len(nearby_entity_set & co_entities)
|
|
@@ -372,14 +371,13 @@ class EntityResolver:
|
|
|
372
371
|
last_seen = $1
|
|
373
372
|
WHERE id = $2
|
|
374
373
|
""",
|
|
375
|
-
unit_event_date,
|
|
374
|
+
unit_event_date,
|
|
375
|
+
best_candidate,
|
|
376
376
|
)
|
|
377
377
|
return best_candidate
|
|
378
378
|
else:
|
|
379
379
|
# Not confident - create new entity
|
|
380
|
-
return await self._create_entity(
|
|
381
|
-
conn, bank_id, entity_text, unit_event_date
|
|
382
|
-
)
|
|
380
|
+
return await self._create_entity(conn, bank_id, entity_text, unit_event_date)
|
|
383
381
|
|
|
384
382
|
async def _create_entity(
|
|
385
383
|
self,
|
|
@@ -413,7 +411,10 @@ class EntityResolver:
|
|
|
413
411
|
last_seen = EXCLUDED.last_seen
|
|
414
412
|
RETURNING id
|
|
415
413
|
""",
|
|
416
|
-
bank_id,
|
|
414
|
+
bank_id,
|
|
415
|
+
entity_text,
|
|
416
|
+
event_date,
|
|
417
|
+
event_date,
|
|
417
418
|
)
|
|
418
419
|
return entity_id
|
|
419
420
|
|
|
@@ -434,7 +435,8 @@ class EntityResolver:
|
|
|
434
435
|
VALUES ($1, $2)
|
|
435
436
|
ON CONFLICT DO NOTHING
|
|
436
437
|
""",
|
|
437
|
-
unit_id,
|
|
438
|
+
unit_id,
|
|
439
|
+
entity_id,
|
|
438
440
|
)
|
|
439
441
|
|
|
440
442
|
# Update co-occurrence cache: find other entities in this unit
|
|
@@ -444,10 +446,11 @@ class EntityResolver:
|
|
|
444
446
|
FROM unit_entities
|
|
445
447
|
WHERE unit_id = $1 AND entity_id != $2
|
|
446
448
|
""",
|
|
447
|
-
unit_id,
|
|
449
|
+
unit_id,
|
|
450
|
+
entity_id,
|
|
448
451
|
)
|
|
449
452
|
|
|
450
|
-
other_entities = [row[
|
|
453
|
+
other_entities = [row["entity_id"] for row in rows]
|
|
451
454
|
|
|
452
455
|
# Update co-occurrences for each pair
|
|
453
456
|
for other_entity_id in other_entities:
|
|
@@ -477,10 +480,11 @@ class EntityResolver:
|
|
|
477
480
|
cooccurrence_count = entity_cooccurrences.cooccurrence_count + 1,
|
|
478
481
|
last_cooccurred = NOW()
|
|
479
482
|
""",
|
|
480
|
-
entity_id_1,
|
|
483
|
+
entity_id_1,
|
|
484
|
+
entity_id_2,
|
|
481
485
|
)
|
|
482
486
|
|
|
483
|
-
async def link_units_to_entities_batch(self, unit_entity_pairs:
|
|
487
|
+
async def link_units_to_entities_batch(self, unit_entity_pairs: list[tuple[str, str]], conn=None):
|
|
484
488
|
"""
|
|
485
489
|
Link multiple memory units to entities in batch (MUCH faster than sequential).
|
|
486
490
|
|
|
@@ -499,7 +503,7 @@ class EntityResolver:
|
|
|
499
503
|
else:
|
|
500
504
|
return await self._link_units_to_entities_batch_impl(conn, unit_entity_pairs)
|
|
501
505
|
|
|
502
|
-
async def _link_units_to_entities_batch_impl(self, conn, unit_entity_pairs:
|
|
506
|
+
async def _link_units_to_entities_batch_impl(self, conn, unit_entity_pairs: list[tuple[str, str]]):
|
|
503
507
|
# Batch insert all unit-entity links
|
|
504
508
|
await conn.executemany(
|
|
505
509
|
"""
|
|
@@ -507,7 +511,7 @@ class EntityResolver:
|
|
|
507
511
|
VALUES ($1, $2)
|
|
508
512
|
ON CONFLICT DO NOTHING
|
|
509
513
|
""",
|
|
510
|
-
unit_entity_pairs
|
|
514
|
+
unit_entity_pairs,
|
|
511
515
|
)
|
|
512
516
|
|
|
513
517
|
# Build map of unit -> entities for co-occurrence calculation
|
|
@@ -524,7 +528,7 @@ class EntityResolver:
|
|
|
524
528
|
entity_list = list(entity_ids) # Convert set to list for iteration
|
|
525
529
|
# For each pair of entities in this unit, create co-occurrence
|
|
526
530
|
for i, entity_id_1 in enumerate(entity_list):
|
|
527
|
-
for entity_id_2 in entity_list[i+1:]:
|
|
531
|
+
for entity_id_2 in entity_list[i + 1 :]:
|
|
528
532
|
# Skip if same entity (shouldn't happen with set, but be safe)
|
|
529
533
|
if entity_id_1 == entity_id_2:
|
|
530
534
|
continue
|
|
@@ -535,7 +539,7 @@ class EntityResolver:
|
|
|
535
539
|
|
|
536
540
|
# Batch update co-occurrences
|
|
537
541
|
if cooccurrence_pairs:
|
|
538
|
-
now = datetime.now(
|
|
542
|
+
now = datetime.now(UTC)
|
|
539
543
|
await conn.executemany(
|
|
540
544
|
"""
|
|
541
545
|
INSERT INTO entity_cooccurrences (entity_id_1, entity_id_2, cooccurrence_count, last_cooccurred)
|
|
@@ -545,10 +549,10 @@ class EntityResolver:
|
|
|
545
549
|
cooccurrence_count = entity_cooccurrences.cooccurrence_count + 1,
|
|
546
550
|
last_cooccurred = EXCLUDED.last_cooccurred
|
|
547
551
|
""",
|
|
548
|
-
[(e1, e2, 1, now) for e1, e2 in cooccurrence_pairs]
|
|
552
|
+
[(e1, e2, 1, now) for e1, e2 in cooccurrence_pairs],
|
|
549
553
|
)
|
|
550
554
|
|
|
551
|
-
async def get_units_by_entity(self, entity_id: str, limit: int = 100) ->
|
|
555
|
+
async def get_units_by_entity(self, entity_id: str, limit: int = 100) -> list[str]:
|
|
552
556
|
"""
|
|
553
557
|
Get all units that mention an entity.
|
|
554
558
|
|
|
@@ -568,15 +572,16 @@ class EntityResolver:
|
|
|
568
572
|
ORDER BY unit_id
|
|
569
573
|
LIMIT $2
|
|
570
574
|
""",
|
|
571
|
-
entity_id,
|
|
575
|
+
entity_id,
|
|
576
|
+
limit,
|
|
572
577
|
)
|
|
573
|
-
return [row[
|
|
578
|
+
return [row["unit_id"] for row in rows]
|
|
574
579
|
|
|
575
580
|
async def get_entity_by_text(
|
|
576
581
|
self,
|
|
577
582
|
bank_id: str,
|
|
578
583
|
entity_text: str,
|
|
579
|
-
) ->
|
|
584
|
+
) -> str | None:
|
|
580
585
|
"""
|
|
581
586
|
Find an entity by text (for query resolution).
|
|
582
587
|
|
|
@@ -596,7 +601,8 @@ class EntityResolver:
|
|
|
596
601
|
ORDER BY mention_count DESC
|
|
597
602
|
LIMIT 1
|
|
598
603
|
""",
|
|
599
|
-
bank_id,
|
|
604
|
+
bank_id,
|
|
605
|
+
entity_text,
|
|
600
606
|
)
|
|
601
607
|
|
|
602
|
-
return row[
|
|
608
|
+
return row["id"] if row else None
|