hindsight-api 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +2 -0
- hindsight_api/alembic/env.py +24 -1
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +14 -4
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +54 -13
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +18 -7
- hindsight_api/api/http.py +253 -230
- hindsight_api/api/mcp.py +14 -3
- hindsight_api/config.py +11 -0
- hindsight_api/daemon.py +204 -0
- hindsight_api/engine/__init__.py +12 -1
- hindsight_api/engine/entity_resolver.py +38 -37
- hindsight_api/engine/interface.py +592 -0
- hindsight_api/engine/llm_wrapper.py +176 -6
- hindsight_api/engine/memory_engine.py +1092 -293
- hindsight_api/engine/retain/bank_utils.py +13 -12
- hindsight_api/engine/retain/chunk_storage.py +3 -2
- hindsight_api/engine/retain/fact_storage.py +10 -7
- hindsight_api/engine/retain/link_utils.py +17 -16
- hindsight_api/engine/retain/observation_regeneration.py +17 -16
- hindsight_api/engine/retain/orchestrator.py +2 -3
- hindsight_api/engine/retain/types.py +25 -8
- hindsight_api/engine/search/graph_retrieval.py +6 -5
- hindsight_api/engine/search/mpfp_retrieval.py +8 -7
- hindsight_api/engine/search/reranking.py +17 -0
- hindsight_api/engine/search/retrieval.py +12 -11
- hindsight_api/engine/search/think_utils.py +1 -1
- hindsight_api/engine/search/tracer.py +1 -1
- hindsight_api/engine/task_backend.py +32 -0
- hindsight_api/extensions/__init__.py +66 -0
- hindsight_api/extensions/base.py +81 -0
- hindsight_api/extensions/builtin/__init__.py +18 -0
- hindsight_api/extensions/builtin/tenant.py +33 -0
- hindsight_api/extensions/context.py +110 -0
- hindsight_api/extensions/http.py +89 -0
- hindsight_api/extensions/loader.py +125 -0
- hindsight_api/extensions/operation_validator.py +325 -0
- hindsight_api/extensions/tenant.py +63 -0
- hindsight_api/main.py +97 -17
- hindsight_api/mcp_local.py +7 -1
- hindsight_api/migrations.py +54 -10
- hindsight_api/models.py +15 -0
- hindsight_api/pg0.py +1 -1
- {hindsight_api-0.1.11.dist-info → hindsight_api-0.1.13.dist-info}/METADATA +1 -1
- hindsight_api-0.1.13.dist-info/RECORD +75 -0
- hindsight_api-0.1.11.dist-info/RECORD +0 -64
- {hindsight_api-0.1.11.dist-info → hindsight_api-0.1.13.dist-info}/WHEEL +0 -0
- {hindsight_api-0.1.11.dist-info → hindsight_api-0.1.13.dist-info}/entry_points.txt +0 -0
hindsight_api/api/mcp.py
CHANGED
|
@@ -9,6 +9,7 @@ from fastmcp import FastMCP
|
|
|
9
9
|
|
|
10
10
|
from hindsight_api import MemoryEngine
|
|
11
11
|
from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES
|
|
12
|
+
from hindsight_api.models import RequestContext
|
|
12
13
|
|
|
13
14
|
# Configure logging from HINDSIGHT_API_LOG_LEVEL environment variable
|
|
14
15
|
_log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "info").lower()
|
|
@@ -67,7 +68,11 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
67
68
|
"""
|
|
68
69
|
try:
|
|
69
70
|
bank_id = get_current_bank_id()
|
|
70
|
-
|
|
71
|
+
if bank_id is None:
|
|
72
|
+
return "Error: No bank_id configured"
|
|
73
|
+
await memory.retain_batch_async(
|
|
74
|
+
bank_id=bank_id, contents=[{"content": content, "context": context}], request_context=RequestContext()
|
|
75
|
+
)
|
|
71
76
|
return "Memory stored successfully"
|
|
72
77
|
except Exception as e:
|
|
73
78
|
logger.error(f"Error storing memory: {e}", exc_info=True)
|
|
@@ -90,10 +95,16 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
90
95
|
"""
|
|
91
96
|
try:
|
|
92
97
|
bank_id = get_current_bank_id()
|
|
98
|
+
if bank_id is None:
|
|
99
|
+
return "Error: No bank_id configured"
|
|
93
100
|
from hindsight_api.engine.memory_engine import Budget
|
|
94
101
|
|
|
95
102
|
search_result = await memory.recall_async(
|
|
96
|
-
bank_id=bank_id,
|
|
103
|
+
bank_id=bank_id,
|
|
104
|
+
query=query,
|
|
105
|
+
fact_type=list(VALID_RECALL_FACT_TYPES),
|
|
106
|
+
budget=Budget.LOW,
|
|
107
|
+
request_context=RequestContext(),
|
|
97
108
|
)
|
|
98
109
|
|
|
99
110
|
results = [
|
|
@@ -102,7 +113,7 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
102
113
|
"text": fact.text,
|
|
103
114
|
"type": fact.fact_type,
|
|
104
115
|
"context": fact.context,
|
|
105
|
-
"
|
|
116
|
+
"occurred_start": fact.occurred_start,
|
|
106
117
|
}
|
|
107
118
|
for fact in search_result.results[:max_results]
|
|
108
119
|
]
|
hindsight_api/config.py
CHANGED
|
@@ -33,6 +33,10 @@ ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
|
|
|
33
33
|
ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
34
34
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
35
35
|
|
|
36
|
+
# Optimization flags
|
|
37
|
+
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
38
|
+
ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
|
|
39
|
+
|
|
36
40
|
# Default values
|
|
37
41
|
DEFAULT_DATABASE_URL = "pg0"
|
|
38
42
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
@@ -107,6 +111,10 @@ class HindsightConfig:
|
|
|
107
111
|
# Recall
|
|
108
112
|
graph_retriever: str
|
|
109
113
|
|
|
114
|
+
# Optimization flags
|
|
115
|
+
skip_llm_verification: bool
|
|
116
|
+
lazy_reranker: bool
|
|
117
|
+
|
|
110
118
|
@classmethod
|
|
111
119
|
def from_env(cls) -> "HindsightConfig":
|
|
112
120
|
"""Create configuration from environment variables."""
|
|
@@ -133,6 +141,9 @@ class HindsightConfig:
|
|
|
133
141
|
mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
|
|
134
142
|
# Recall
|
|
135
143
|
graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
|
|
144
|
+
# Optimization flags
|
|
145
|
+
skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
|
|
146
|
+
lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
|
|
136
147
|
)
|
|
137
148
|
|
|
138
149
|
def get_llm_base_url(self) -> str:
|
hindsight_api/daemon.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Daemon mode support for Hindsight API.
|
|
3
|
+
|
|
4
|
+
Provides idle timeout and lockfile management for running as a background daemon.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import fcntl
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Default daemon configuration
|
|
18
|
+
DEFAULT_DAEMON_PORT = 8889
|
|
19
|
+
DEFAULT_IDLE_TIMEOUT = 0 # 0 = no auto-exit (hindsight-embed passes its own timeout)
|
|
20
|
+
LOCKFILE_PATH = Path.home() / ".hindsight" / "daemon.lock"
|
|
21
|
+
DAEMON_LOG_PATH = Path.home() / ".hindsight" / "daemon.log"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class IdleTimeoutMiddleware:
|
|
25
|
+
"""ASGI middleware that tracks activity and exits after idle timeout."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, app, idle_timeout: int = DEFAULT_IDLE_TIMEOUT):
|
|
28
|
+
self.app = app
|
|
29
|
+
self.idle_timeout = idle_timeout
|
|
30
|
+
self.last_activity = time.time()
|
|
31
|
+
self._checker_task = None
|
|
32
|
+
|
|
33
|
+
async def __call__(self, scope, receive, send):
|
|
34
|
+
# Update activity timestamp on each request
|
|
35
|
+
self.last_activity = time.time()
|
|
36
|
+
await self.app(scope, receive, send)
|
|
37
|
+
|
|
38
|
+
def start_idle_checker(self):
|
|
39
|
+
"""Start the background task that checks for idle timeout."""
|
|
40
|
+
self._checker_task = asyncio.create_task(self._check_idle())
|
|
41
|
+
|
|
42
|
+
async def _check_idle(self):
|
|
43
|
+
"""Background task that exits the process after idle timeout."""
|
|
44
|
+
# If idle_timeout is 0, don't auto-exit
|
|
45
|
+
if self.idle_timeout <= 0:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
while True:
|
|
49
|
+
await asyncio.sleep(30) # Check every 30 seconds
|
|
50
|
+
idle_time = time.time() - self.last_activity
|
|
51
|
+
if idle_time > self.idle_timeout:
|
|
52
|
+
logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
|
|
53
|
+
# Give a moment for any in-flight requests
|
|
54
|
+
await asyncio.sleep(1)
|
|
55
|
+
os._exit(0)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DaemonLock:
|
|
59
|
+
"""
|
|
60
|
+
File-based lock to prevent multiple daemon instances.
|
|
61
|
+
|
|
62
|
+
Uses fcntl.flock for atomic locking on Unix systems.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, lockfile: Path = LOCKFILE_PATH):
|
|
66
|
+
self.lockfile = lockfile
|
|
67
|
+
self._fd = None
|
|
68
|
+
|
|
69
|
+
def acquire(self) -> bool:
|
|
70
|
+
"""
|
|
71
|
+
Try to acquire the daemon lock.
|
|
72
|
+
|
|
73
|
+
Returns True if lock acquired, False if another daemon is running.
|
|
74
|
+
"""
|
|
75
|
+
self.lockfile.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
self._fd = open(self.lockfile, "w")
|
|
79
|
+
fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
80
|
+
# Write PID for debugging
|
|
81
|
+
self._fd.write(str(os.getpid()))
|
|
82
|
+
self._fd.flush()
|
|
83
|
+
return True
|
|
84
|
+
except (IOError, OSError):
|
|
85
|
+
# Lock is held by another process
|
|
86
|
+
if self._fd:
|
|
87
|
+
self._fd.close()
|
|
88
|
+
self._fd = None
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
def release(self):
|
|
92
|
+
"""Release the daemon lock."""
|
|
93
|
+
if self._fd:
|
|
94
|
+
try:
|
|
95
|
+
fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
|
|
96
|
+
self._fd.close()
|
|
97
|
+
except Exception:
|
|
98
|
+
pass
|
|
99
|
+
finally:
|
|
100
|
+
self._fd = None
|
|
101
|
+
# Remove lockfile
|
|
102
|
+
try:
|
|
103
|
+
self.lockfile.unlink()
|
|
104
|
+
except Exception:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def is_locked(self) -> bool:
|
|
108
|
+
"""Check if the lock is held by another process."""
|
|
109
|
+
if not self.lockfile.exists():
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
fd = open(self.lockfile, "r")
|
|
114
|
+
fcntl.flock(fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
115
|
+
# We got the lock, so no one else has it
|
|
116
|
+
fcntl.flock(fd.fileno(), fcntl.LOCK_UN)
|
|
117
|
+
fd.close()
|
|
118
|
+
return False
|
|
119
|
+
except (IOError, OSError):
|
|
120
|
+
return True
|
|
121
|
+
|
|
122
|
+
def get_pid(self) -> int | None:
|
|
123
|
+
"""Get the PID of the daemon holding the lock."""
|
|
124
|
+
if not self.lockfile.exists():
|
|
125
|
+
return None
|
|
126
|
+
try:
|
|
127
|
+
with open(self.lockfile, "r") as f:
|
|
128
|
+
return int(f.read().strip())
|
|
129
|
+
except (ValueError, IOError):
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def daemonize():
|
|
134
|
+
"""
|
|
135
|
+
Fork the current process into a background daemon.
|
|
136
|
+
|
|
137
|
+
Uses double-fork technique to properly detach from terminal.
|
|
138
|
+
"""
|
|
139
|
+
# First fork
|
|
140
|
+
pid = os.fork()
|
|
141
|
+
if pid > 0:
|
|
142
|
+
# Parent exits
|
|
143
|
+
sys.exit(0)
|
|
144
|
+
|
|
145
|
+
# Create new session
|
|
146
|
+
os.setsid()
|
|
147
|
+
|
|
148
|
+
# Second fork to prevent zombie processes
|
|
149
|
+
pid = os.fork()
|
|
150
|
+
if pid > 0:
|
|
151
|
+
sys.exit(0)
|
|
152
|
+
|
|
153
|
+
# Redirect standard file descriptors to log file
|
|
154
|
+
DAEMON_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
|
|
156
|
+
sys.stdout.flush()
|
|
157
|
+
sys.stderr.flush()
|
|
158
|
+
|
|
159
|
+
# Redirect stdin to /dev/null
|
|
160
|
+
with open("/dev/null", "r") as devnull:
|
|
161
|
+
os.dup2(devnull.fileno(), sys.stdin.fileno())
|
|
162
|
+
|
|
163
|
+
# Redirect stdout/stderr to log file
|
|
164
|
+
log_fd = open(DAEMON_LOG_PATH, "a")
|
|
165
|
+
os.dup2(log_fd.fileno(), sys.stdout.fileno())
|
|
166
|
+
os.dup2(log_fd.fileno(), sys.stderr.fileno())
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
|
|
170
|
+
"""Check if a daemon is running and responsive on the given port."""
|
|
171
|
+
import socket
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
175
|
+
sock.settimeout(1)
|
|
176
|
+
result = sock.connect_ex(("127.0.0.1", port))
|
|
177
|
+
sock.close()
|
|
178
|
+
return result == 0
|
|
179
|
+
except Exception:
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def stop_daemon(port: int = DEFAULT_DAEMON_PORT) -> bool:
|
|
184
|
+
"""Stop a running daemon by sending SIGTERM to the process."""
|
|
185
|
+
lock = DaemonLock()
|
|
186
|
+
pid = lock.get_pid()
|
|
187
|
+
|
|
188
|
+
if pid is None:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
import signal
|
|
193
|
+
|
|
194
|
+
os.kill(pid, signal.SIGTERM)
|
|
195
|
+
# Wait for process to exit
|
|
196
|
+
for _ in range(50): # Wait up to 5 seconds
|
|
197
|
+
time.sleep(0.1)
|
|
198
|
+
try:
|
|
199
|
+
os.kill(pid, 0) # Check if process exists
|
|
200
|
+
except OSError:
|
|
201
|
+
return True # Process exited
|
|
202
|
+
return False
|
|
203
|
+
except OSError:
|
|
204
|
+
return False
|
hindsight_api/engine/__init__.py
CHANGED
|
@@ -11,7 +11,13 @@ from .cross_encoder import CrossEncoderModel, LocalSTCrossEncoder, RemoteTEICros
|
|
|
11
11
|
from .db_utils import acquire_with_retry
|
|
12
12
|
from .embeddings import Embeddings, LocalSTEmbeddings, RemoteTEIEmbeddings
|
|
13
13
|
from .llm_wrapper import LLMConfig
|
|
14
|
-
from .memory_engine import
|
|
14
|
+
from .memory_engine import (
|
|
15
|
+
MemoryEngine,
|
|
16
|
+
UnqualifiedTableError,
|
|
17
|
+
fq_table,
|
|
18
|
+
get_current_schema,
|
|
19
|
+
validate_sql_schema,
|
|
20
|
+
)
|
|
15
21
|
from .response_models import MemoryFact, RecallResult, ReflectResult
|
|
16
22
|
from .search.trace import (
|
|
17
23
|
EntryPoint,
|
|
@@ -49,4 +55,9 @@ __all__ = [
|
|
|
49
55
|
"RecallResult",
|
|
50
56
|
"ReflectResult",
|
|
51
57
|
"MemoryFact",
|
|
58
|
+
# Schema safety utilities
|
|
59
|
+
"fq_table",
|
|
60
|
+
"get_current_schema",
|
|
61
|
+
"validate_sql_schema",
|
|
62
|
+
"UnqualifiedTableError",
|
|
52
63
|
]
|
|
@@ -11,6 +11,7 @@ from difflib import SequenceMatcher
|
|
|
11
11
|
import asyncpg
|
|
12
12
|
|
|
13
13
|
from .db_utils import acquire_with_retry
|
|
14
|
+
from .memory_engine import fq_table
|
|
14
15
|
|
|
15
16
|
# Load spaCy model (singleton)
|
|
16
17
|
_nlp = None
|
|
@@ -68,9 +69,9 @@ class EntityResolver:
|
|
|
68
69
|
) -> list[str]:
|
|
69
70
|
# Query ALL candidates for this bank
|
|
70
71
|
all_entities = await conn.fetch(
|
|
71
|
-
"""
|
|
72
|
+
f"""
|
|
72
73
|
SELECT canonical_name, id, metadata, last_seen, mention_count
|
|
73
|
-
FROM entities
|
|
74
|
+
FROM {fq_table("entities")}
|
|
74
75
|
WHERE bank_id = $1
|
|
75
76
|
""",
|
|
76
77
|
bank_id,
|
|
@@ -82,11 +83,11 @@ class EntityResolver:
|
|
|
82
83
|
# Query ALL co-occurrences for this bank's entities in one query
|
|
83
84
|
# This builds a map of entity_id -> set of co-occurring entity names
|
|
84
85
|
all_cooccurrences = await conn.fetch(
|
|
85
|
-
"""
|
|
86
|
+
f"""
|
|
86
87
|
SELECT ec.entity_id_1, ec.entity_id_2, ec.cooccurrence_count
|
|
87
|
-
FROM entity_cooccurrences ec
|
|
88
|
-
WHERE ec.entity_id_1 IN (SELECT id FROM entities WHERE bank_id = $1)
|
|
89
|
-
OR ec.entity_id_2 IN (SELECT id FROM entities WHERE bank_id = $1)
|
|
88
|
+
FROM {fq_table("entity_cooccurrences")} ec
|
|
89
|
+
WHERE ec.entity_id_1 IN (SELECT id FROM {fq_table("entities")} WHERE bank_id = $1)
|
|
90
|
+
OR ec.entity_id_2 IN (SELECT id FROM {fq_table("entities")} WHERE bank_id = $1)
|
|
90
91
|
""",
|
|
91
92
|
bank_id,
|
|
92
93
|
)
|
|
@@ -195,8 +196,8 @@ class EntityResolver:
|
|
|
195
196
|
# Batch update existing entities
|
|
196
197
|
if entities_to_update:
|
|
197
198
|
await conn.executemany(
|
|
198
|
-
"""
|
|
199
|
-
UPDATE entities SET
|
|
199
|
+
f"""
|
|
200
|
+
UPDATE {fq_table("entities")} SET
|
|
200
201
|
mention_count = mention_count + 1,
|
|
201
202
|
last_seen = $2
|
|
202
203
|
WHERE id = $1::uuid
|
|
@@ -232,13 +233,13 @@ class EntityResolver:
|
|
|
232
233
|
# Batch INSERT ... ON CONFLICT with RETURNING
|
|
233
234
|
# This is much faster than individual inserts
|
|
234
235
|
rows = await conn.fetch(
|
|
235
|
-
"""
|
|
236
|
-
INSERT INTO entities (bank_id, canonical_name, first_seen, last_seen, mention_count)
|
|
236
|
+
f"""
|
|
237
|
+
INSERT INTO {fq_table("entities")} (bank_id, canonical_name, first_seen, last_seen, mention_count)
|
|
237
238
|
SELECT $1, name, event_date, event_date, 1
|
|
238
239
|
FROM unnest($2::text[], $3::timestamptz[]) AS t(name, event_date)
|
|
239
240
|
ON CONFLICT (bank_id, LOWER(canonical_name))
|
|
240
241
|
DO UPDATE SET
|
|
241
|
-
mention_count = entities.mention_count + 1,
|
|
242
|
+
mention_count = {fq_table("entities")}.mention_count + 1,
|
|
242
243
|
last_seen = EXCLUDED.last_seen
|
|
243
244
|
RETURNING id
|
|
244
245
|
""",
|
|
@@ -279,9 +280,9 @@ class EntityResolver:
|
|
|
279
280
|
async with acquire_with_retry(self.pool) as conn:
|
|
280
281
|
# Find candidate entities with similar name
|
|
281
282
|
candidates = await conn.fetch(
|
|
282
|
-
"""
|
|
283
|
+
f"""
|
|
283
284
|
SELECT id, canonical_name, metadata, last_seen
|
|
284
|
-
FROM entities
|
|
285
|
+
FROM {fq_table("entities")}
|
|
285
286
|
WHERE bank_id = $1
|
|
286
287
|
AND (
|
|
287
288
|
canonical_name ILIKE $2
|
|
@@ -326,10 +327,10 @@ class EntityResolver:
|
|
|
326
327
|
# Get entities that co-occurred with this candidate before
|
|
327
328
|
# Use the materialized co-occurrence cache for fast lookup
|
|
328
329
|
co_entity_rows = await conn.fetch(
|
|
329
|
-
"""
|
|
330
|
+
f"""
|
|
330
331
|
SELECT e.canonical_name, ec.cooccurrence_count
|
|
331
|
-
FROM entity_cooccurrences ec
|
|
332
|
-
JOIN entities e ON (
|
|
332
|
+
FROM {fq_table("entity_cooccurrences")} ec
|
|
333
|
+
JOIN {fq_table("entities")} e ON (
|
|
333
334
|
CASE
|
|
334
335
|
WHEN ec.entity_id_1 = $1 THEN ec.entity_id_2
|
|
335
336
|
WHEN ec.entity_id_2 = $1 THEN ec.entity_id_1
|
|
@@ -365,8 +366,8 @@ class EntityResolver:
|
|
|
365
366
|
if best_score > threshold:
|
|
366
367
|
# Update entity
|
|
367
368
|
await conn.execute(
|
|
368
|
-
"""
|
|
369
|
-
UPDATE entities
|
|
369
|
+
f"""
|
|
370
|
+
UPDATE {fq_table("entities")}
|
|
370
371
|
SET mention_count = mention_count + 1,
|
|
371
372
|
last_seen = $1
|
|
372
373
|
WHERE id = $2
|
|
@@ -402,12 +403,12 @@ class EntityResolver:
|
|
|
402
403
|
Entity ID
|
|
403
404
|
"""
|
|
404
405
|
entity_id = await conn.fetchval(
|
|
405
|
-
"""
|
|
406
|
-
INSERT INTO entities (bank_id, canonical_name, first_seen, last_seen, mention_count)
|
|
406
|
+
f"""
|
|
407
|
+
INSERT INTO {fq_table("entities")} (bank_id, canonical_name, first_seen, last_seen, mention_count)
|
|
407
408
|
VALUES ($1, $2, $3, $4, 1)
|
|
408
409
|
ON CONFLICT (bank_id, LOWER(canonical_name))
|
|
409
410
|
DO UPDATE SET
|
|
410
|
-
mention_count = entities.mention_count + 1,
|
|
411
|
+
mention_count = {fq_table("entities")}.mention_count + 1,
|
|
411
412
|
last_seen = EXCLUDED.last_seen
|
|
412
413
|
RETURNING id
|
|
413
414
|
""",
|
|
@@ -430,8 +431,8 @@ class EntityResolver:
|
|
|
430
431
|
async with acquire_with_retry(self.pool) as conn:
|
|
431
432
|
# Insert unit-entity link
|
|
432
433
|
await conn.execute(
|
|
433
|
-
"""
|
|
434
|
-
INSERT INTO unit_entities (unit_id, entity_id)
|
|
434
|
+
f"""
|
|
435
|
+
INSERT INTO {fq_table("unit_entities")} (unit_id, entity_id)
|
|
435
436
|
VALUES ($1, $2)
|
|
436
437
|
ON CONFLICT DO NOTHING
|
|
437
438
|
""",
|
|
@@ -441,9 +442,9 @@ class EntityResolver:
|
|
|
441
442
|
|
|
442
443
|
# Update co-occurrence cache: find other entities in this unit
|
|
443
444
|
rows = await conn.fetch(
|
|
444
|
-
"""
|
|
445
|
+
f"""
|
|
445
446
|
SELECT entity_id
|
|
446
|
-
FROM unit_entities
|
|
447
|
+
FROM {fq_table("unit_entities")}
|
|
447
448
|
WHERE unit_id = $1 AND entity_id != $2
|
|
448
449
|
""",
|
|
449
450
|
unit_id,
|
|
@@ -472,12 +473,12 @@ class EntityResolver:
|
|
|
472
473
|
entity_id_1, entity_id_2 = entity_id_2, entity_id_1
|
|
473
474
|
|
|
474
475
|
await conn.execute(
|
|
475
|
-
"""
|
|
476
|
-
INSERT INTO entity_cooccurrences (entity_id_1, entity_id_2, cooccurrence_count, last_cooccurred)
|
|
476
|
+
f"""
|
|
477
|
+
INSERT INTO {fq_table("entity_cooccurrences")} (entity_id_1, entity_id_2, cooccurrence_count, last_cooccurred)
|
|
477
478
|
VALUES ($1, $2, 1, NOW())
|
|
478
479
|
ON CONFLICT (entity_id_1, entity_id_2)
|
|
479
480
|
DO UPDATE SET
|
|
480
|
-
cooccurrence_count = entity_cooccurrences.cooccurrence_count + 1,
|
|
481
|
+
cooccurrence_count = {fq_table("entity_cooccurrences")}.cooccurrence_count + 1,
|
|
481
482
|
last_cooccurred = NOW()
|
|
482
483
|
""",
|
|
483
484
|
entity_id_1,
|
|
@@ -506,8 +507,8 @@ class EntityResolver:
|
|
|
506
507
|
async def _link_units_to_entities_batch_impl(self, conn, unit_entity_pairs: list[tuple[str, str]]):
|
|
507
508
|
# Batch insert all unit-entity links
|
|
508
509
|
await conn.executemany(
|
|
509
|
-
"""
|
|
510
|
-
INSERT INTO unit_entities (unit_id, entity_id)
|
|
510
|
+
f"""
|
|
511
|
+
INSERT INTO {fq_table("unit_entities")} (unit_id, entity_id)
|
|
511
512
|
VALUES ($1, $2)
|
|
512
513
|
ON CONFLICT DO NOTHING
|
|
513
514
|
""",
|
|
@@ -541,12 +542,12 @@ class EntityResolver:
|
|
|
541
542
|
if cooccurrence_pairs:
|
|
542
543
|
now = datetime.now(UTC)
|
|
543
544
|
await conn.executemany(
|
|
544
|
-
"""
|
|
545
|
-
INSERT INTO entity_cooccurrences (entity_id_1, entity_id_2, cooccurrence_count, last_cooccurred)
|
|
545
|
+
f"""
|
|
546
|
+
INSERT INTO {fq_table("entity_cooccurrences")} (entity_id_1, entity_id_2, cooccurrence_count, last_cooccurred)
|
|
546
547
|
VALUES ($1, $2, $3, $4)
|
|
547
548
|
ON CONFLICT (entity_id_1, entity_id_2)
|
|
548
549
|
DO UPDATE SET
|
|
549
|
-
cooccurrence_count = entity_cooccurrences.cooccurrence_count + 1,
|
|
550
|
+
cooccurrence_count = {fq_table("entity_cooccurrences")}.cooccurrence_count + 1,
|
|
550
551
|
last_cooccurred = EXCLUDED.last_cooccurred
|
|
551
552
|
""",
|
|
552
553
|
[(e1, e2, 1, now) for e1, e2 in cooccurrence_pairs],
|
|
@@ -565,9 +566,9 @@ class EntityResolver:
|
|
|
565
566
|
"""
|
|
566
567
|
async with acquire_with_retry(self.pool) as conn:
|
|
567
568
|
rows = await conn.fetch(
|
|
568
|
-
"""
|
|
569
|
+
f"""
|
|
569
570
|
SELECT unit_id
|
|
570
|
-
FROM unit_entities
|
|
571
|
+
FROM {fq_table("unit_entities")}
|
|
571
572
|
WHERE entity_id = $1
|
|
572
573
|
ORDER BY unit_id
|
|
573
574
|
LIMIT $2
|
|
@@ -594,8 +595,8 @@ class EntityResolver:
|
|
|
594
595
|
"""
|
|
595
596
|
async with acquire_with_retry(self.pool) as conn:
|
|
596
597
|
row = await conn.fetchrow(
|
|
597
|
-
"""
|
|
598
|
-
SELECT id FROM entities
|
|
598
|
+
f"""
|
|
599
|
+
SELECT id FROM {fq_table("entities")}
|
|
599
600
|
WHERE bank_id = $1
|
|
600
601
|
AND canonical_name ILIKE $2
|
|
601
602
|
ORDER BY mention_count DESC
|