hindsight-api 0.1.12__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/PKG-INFO +1 -1
  2. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/api/http.py +19 -2
  3. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/config.py +11 -0
  4. hindsight_api-0.1.14/hindsight_api/daemon.py +204 -0
  5. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/memory_engine.py +125 -102
  6. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/reranking.py +17 -0
  7. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/main.py +96 -16
  8. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/pyproject.toml +1 -1
  9. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/.gitignore +0 -0
  10. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/README.md +0 -0
  11. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/__init__.py +0 -0
  12. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/README +0 -0
  13. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/env.py +0 -0
  14. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/script.py.mako +0 -0
  15. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
  16. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
  17. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
  18. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
  19. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
  20. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
  21. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/api/__init__.py +0 -0
  22. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/api/mcp.py +0 -0
  23. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/banner.py +0 -0
  24. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/__init__.py +0 -0
  25. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/cross_encoder.py +0 -0
  26. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/db_utils.py +0 -0
  27. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/embeddings.py +0 -0
  28. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/entity_resolver.py +0 -0
  29. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/interface.py +0 -0
  30. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/llm_wrapper.py +0 -0
  31. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/query_analyzer.py +0 -0
  32. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/response_models.py +0 -0
  33. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/__init__.py +0 -0
  34. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/bank_utils.py +0 -0
  35. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/chunk_storage.py +0 -0
  36. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/deduplication.py +0 -0
  37. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/embedding_processing.py +0 -0
  38. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/embedding_utils.py +0 -0
  39. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/entity_processing.py +0 -0
  40. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/fact_extraction.py +0 -0
  41. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/fact_storage.py +0 -0
  42. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/link_creation.py +0 -0
  43. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/link_utils.py +0 -0
  44. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/observation_regeneration.py +0 -0
  45. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/orchestrator.py +0 -0
  46. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/retain/types.py +0 -0
  47. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/__init__.py +0 -0
  48. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/fusion.py +0 -0
  49. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/graph_retrieval.py +0 -0
  50. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
  51. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/observation_utils.py +0 -0
  52. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/retrieval.py +0 -0
  53. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/scoring.py +0 -0
  54. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/temporal_extraction.py +0 -0
  55. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/think_utils.py +0 -0
  56. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/trace.py +0 -0
  57. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/tracer.py +0 -0
  58. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/search/types.py +0 -0
  59. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/task_backend.py +0 -0
  60. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/engine/utils.py +0 -0
  61. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/__init__.py +0 -0
  62. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/base.py +0 -0
  63. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/builtin/__init__.py +0 -0
  64. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/builtin/tenant.py +0 -0
  65. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/context.py +0 -0
  66. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/http.py +0 -0
  67. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/loader.py +0 -0
  68. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/operation_validator.py +0 -0
  69. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/extensions/tenant.py +0 -0
  70. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/mcp_local.py +0 -0
  71. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/metrics.py +0 -0
  72. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/migrations.py +0 -0
  73. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/models.py +0 -0
  74. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/pg0.py +0 -0
  75. {hindsight_api-0.1.12 → hindsight_api-0.1.14}/hindsight_api/server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.1.12
3
+ Version: 0.1.14
4
4
  Summary: Hindsight: Agent Memory That Works Like Human Memory
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: alembic>=1.17.1
@@ -29,7 +29,7 @@ def _parse_metadata(metadata: Any) -> dict[str, Any]:
29
29
  return {}
30
30
 
31
31
 
32
- from pydantic import BaseModel, ConfigDict, Field
32
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
33
33
 
34
34
  from hindsight_api import MemoryEngine
35
35
  from hindsight_api.engine.db_utils import acquire_with_retry
@@ -291,7 +291,7 @@ class MemoryItem(BaseModel):
291
291
  "metadata": {"source": "slack", "channel": "engineering"},
292
292
  "document_id": "meeting_notes_2024_01_15",
293
293
  }
294
- }
294
+ },
295
295
  )
296
296
 
297
297
  content: str
@@ -300,6 +300,23 @@ class MemoryItem(BaseModel):
300
300
  metadata: dict[str, str] | None = None
301
301
  document_id: str | None = Field(default=None, description="Optional document ID for this memory item.")
302
302
 
303
+ @field_validator("timestamp", mode="before")
304
+ @classmethod
305
+ def validate_timestamp(cls, v):
306
+ if v is None or v == "":
307
+ return None
308
+ if isinstance(v, datetime):
309
+ return v
310
+ if isinstance(v, str):
311
+ try:
312
+ # Try parsing as ISO format
313
+ return datetime.fromisoformat(v.replace("Z", "+00:00"))
314
+ except ValueError as e:
315
+ raise ValueError(
316
+ f"Invalid timestamp/event_date format: '{v}'. Expected ISO format like '2024-01-15T10:30:00' or '2024-01-15T10:30:00Z'"
317
+ ) from e
318
+ raise ValueError(f"timestamp must be a string or datetime, got {type(v).__name__}")
319
+
303
320
 
304
321
  class RetainRequest(BaseModel):
305
322
  """Request model for retain endpoint."""
@@ -33,6 +33,10 @@ ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
33
33
  ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
34
34
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
35
35
 
36
+ # Optimization flags
37
+ ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
38
+ ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
39
+
36
40
  # Default values
37
41
  DEFAULT_DATABASE_URL = "pg0"
38
42
  DEFAULT_LLM_PROVIDER = "openai"
@@ -107,6 +111,10 @@ class HindsightConfig:
107
111
  # Recall
108
112
  graph_retriever: str
109
113
 
114
+ # Optimization flags
115
+ skip_llm_verification: bool
116
+ lazy_reranker: bool
117
+
110
118
  @classmethod
111
119
  def from_env(cls) -> "HindsightConfig":
112
120
  """Create configuration from environment variables."""
@@ -133,6 +141,9 @@ class HindsightConfig:
133
141
  mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
134
142
  # Recall
135
143
  graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
144
+ # Optimization flags
145
+ skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
146
+ lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
136
147
  )
137
148
 
138
149
  def get_llm_base_url(self) -> str:
@@ -0,0 +1,204 @@
1
+ """
2
+ Daemon mode support for Hindsight API.
3
+
4
+ Provides idle timeout and lockfile management for running as a background daemon.
5
+ """
6
+
7
+ import asyncio
8
+ import fcntl
9
+ import logging
10
+ import os
11
+ import sys
12
+ import time
13
+ from pathlib import Path
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Default daemon configuration
18
+ DEFAULT_DAEMON_PORT = 8889
19
+ DEFAULT_IDLE_TIMEOUT = 0 # 0 = no auto-exit (hindsight-embed passes its own timeout)
20
+ LOCKFILE_PATH = Path.home() / ".hindsight" / "daemon.lock"
21
+ DAEMON_LOG_PATH = Path.home() / ".hindsight" / "daemon.log"
22
+
23
+
24
+ class IdleTimeoutMiddleware:
25
+ """ASGI middleware that tracks activity and exits after idle timeout."""
26
+
27
+ def __init__(self, app, idle_timeout: int = DEFAULT_IDLE_TIMEOUT):
28
+ self.app = app
29
+ self.idle_timeout = idle_timeout
30
+ self.last_activity = time.time()
31
+ self._checker_task = None
32
+
33
+ async def __call__(self, scope, receive, send):
34
+ # Update activity timestamp on each request
35
+ self.last_activity = time.time()
36
+ await self.app(scope, receive, send)
37
+
38
+ def start_idle_checker(self):
39
+ """Start the background task that checks for idle timeout."""
40
+ self._checker_task = asyncio.create_task(self._check_idle())
41
+
42
+ async def _check_idle(self):
43
+ """Background task that exits the process after idle timeout."""
44
+ # If idle_timeout is 0, don't auto-exit
45
+ if self.idle_timeout <= 0:
46
+ return
47
+
48
+ while True:
49
+ await asyncio.sleep(30) # Check every 30 seconds
50
+ idle_time = time.time() - self.last_activity
51
+ if idle_time > self.idle_timeout:
52
+ logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
53
+ # Give a moment for any in-flight requests
54
+ await asyncio.sleep(1)
55
+ os._exit(0)
56
+
57
+
58
+ class DaemonLock:
59
+ """
60
+ File-based lock to prevent multiple daemon instances.
61
+
62
+ Uses fcntl.flock for atomic locking on Unix systems.
63
+ """
64
+
65
+ def __init__(self, lockfile: Path = LOCKFILE_PATH):
66
+ self.lockfile = lockfile
67
+ self._fd = None
68
+
69
+ def acquire(self) -> bool:
70
+ """
71
+ Try to acquire the daemon lock.
72
+
73
+ Returns True if lock acquired, False if another daemon is running.
74
+ """
75
+ self.lockfile.parent.mkdir(parents=True, exist_ok=True)
76
+
77
+ try:
78
+ self._fd = open(self.lockfile, "w")
79
+ fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
80
+ # Write PID for debugging
81
+ self._fd.write(str(os.getpid()))
82
+ self._fd.flush()
83
+ return True
84
+ except (IOError, OSError):
85
+ # Lock is held by another process
86
+ if self._fd:
87
+ self._fd.close()
88
+ self._fd = None
89
+ return False
90
+
91
+ def release(self):
92
+ """Release the daemon lock."""
93
+ if self._fd:
94
+ try:
95
+ fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
96
+ self._fd.close()
97
+ except Exception:
98
+ pass
99
+ finally:
100
+ self._fd = None
101
+ # Remove lockfile
102
+ try:
103
+ self.lockfile.unlink()
104
+ except Exception:
105
+ pass
106
+
107
+ def is_locked(self) -> bool:
108
+ """Check if the lock is held by another process."""
109
+ if not self.lockfile.exists():
110
+ return False
111
+
112
+ try:
113
+ fd = open(self.lockfile, "r")
114
+ fcntl.flock(fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
115
+ # We got the lock, so no one else has it
116
+ fcntl.flock(fd.fileno(), fcntl.LOCK_UN)
117
+ fd.close()
118
+ return False
119
+ except (IOError, OSError):
120
+ return True
121
+
122
+ def get_pid(self) -> int | None:
123
+ """Get the PID of the daemon holding the lock."""
124
+ if not self.lockfile.exists():
125
+ return None
126
+ try:
127
+ with open(self.lockfile, "r") as f:
128
+ return int(f.read().strip())
129
+ except (ValueError, IOError):
130
+ return None
131
+
132
+
133
+ def daemonize():
134
+ """
135
+ Fork the current process into a background daemon.
136
+
137
+ Uses double-fork technique to properly detach from terminal.
138
+ """
139
+ # First fork
140
+ pid = os.fork()
141
+ if pid > 0:
142
+ # Parent exits
143
+ sys.exit(0)
144
+
145
+ # Create new session
146
+ os.setsid()
147
+
148
+ # Second fork to prevent zombie processes
149
+ pid = os.fork()
150
+ if pid > 0:
151
+ sys.exit(0)
152
+
153
+ # Redirect standard file descriptors to log file
154
+ DAEMON_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
155
+
156
+ sys.stdout.flush()
157
+ sys.stderr.flush()
158
+
159
+ # Redirect stdin to /dev/null
160
+ with open("/dev/null", "r") as devnull:
161
+ os.dup2(devnull.fileno(), sys.stdin.fileno())
162
+
163
+ # Redirect stdout/stderr to log file
164
+ log_fd = open(DAEMON_LOG_PATH, "a")
165
+ os.dup2(log_fd.fileno(), sys.stdout.fileno())
166
+ os.dup2(log_fd.fileno(), sys.stderr.fileno())
167
+
168
+
169
+ def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
170
+ """Check if a daemon is running and responsive on the given port."""
171
+ import socket
172
+
173
+ try:
174
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
175
+ sock.settimeout(1)
176
+ result = sock.connect_ex(("127.0.0.1", port))
177
+ sock.close()
178
+ return result == 0
179
+ except Exception:
180
+ return False
181
+
182
+
183
+ def stop_daemon(port: int = DEFAULT_DAEMON_PORT) -> bool:
184
+ """Stop a running daemon by sending SIGTERM to the process."""
185
+ lock = DaemonLock()
186
+ pid = lock.get_pid()
187
+
188
+ if pid is None:
189
+ return False
190
+
191
+ try:
192
+ import signal
193
+
194
+ os.kill(pid, signal.SIGTERM)
195
+ # Wait for process to exit
196
+ for _ in range(50): # Wait up to 5 seconds
197
+ time.sleep(0.1)
198
+ try:
199
+ os.kill(pid, 0) # Check if process exists
200
+ except OSError:
201
+ return True # Process exited
202
+ return False
203
+ except OSError:
204
+ return False
@@ -202,6 +202,8 @@ class MemoryEngine(MemoryEngineInterface):
202
202
  run_migrations: bool = True,
203
203
  operation_validator: "OperationValidatorExtension | None" = None,
204
204
  tenant_extension: "TenantExtension | None" = None,
205
+ skip_llm_verification: bool | None = None,
206
+ lazy_reranker: bool | None = None,
205
207
  ):
206
208
  """
207
209
  Initialize the temporal + semantic memory system.
@@ -227,12 +229,23 @@ class MemoryEngine(MemoryEngineInterface):
227
229
  If provided, retain/recall/reflect operations will be validated.
228
230
  tenant_extension: Optional extension for multi-tenancy and API key authentication.
229
231
  If provided, operations require a RequestContext for authentication.
232
+ skip_llm_verification: Skip LLM connection verification during initialization.
233
+ Defaults to HINDSIGHT_API_SKIP_LLM_VERIFICATION env var or False.
234
+ lazy_reranker: Delay reranker initialization until first use. Useful for retain-only
235
+ operations that don't need the cross-encoder. Defaults to
236
+ HINDSIGHT_API_LAZY_RERANKER env var or False.
230
237
  """
231
238
  # Load config from environment for any missing parameters
232
239
  from ..config import get_config
233
240
 
234
241
  config = get_config()
235
242
 
243
+ # Apply optimization flags from config if not explicitly provided
244
+ self._skip_llm_verification = (
245
+ skip_llm_verification if skip_llm_verification is not None else config.skip_llm_verification
246
+ )
247
+ self._lazy_reranker = lazy_reranker if lazy_reranker is not None else config.lazy_reranker
248
+
236
249
  # Apply defaults from config
237
250
  db_url = db_url or config.database_url
238
251
  memory_llm_provider = memory_llm_provider or config.llm_provider
@@ -396,22 +409,22 @@ class MemoryEngine(MemoryEngineInterface):
396
409
 
397
410
  Args:
398
411
  task_dict: Dict with 'node_ids' key containing list of node IDs to update
412
+
413
+ Raises:
414
+ Exception: Any exception from database operations (propagates to execute_task for retry)
399
415
  """
400
416
  node_ids = task_dict.get("node_ids", [])
401
417
  if not node_ids:
402
418
  return
403
419
 
404
420
  pool = await self._get_pool()
405
- try:
406
- # Convert string UUIDs to UUID type for faster matching
407
- uuid_list = [uuid.UUID(nid) for nid in node_ids]
408
- async with acquire_with_retry(pool) as conn:
409
- await conn.execute(
410
- f"UPDATE {fq_table('memory_units')} SET access_count = access_count + 1 WHERE id = ANY($1::uuid[])",
411
- uuid_list,
412
- )
413
- except Exception as e:
414
- logger.error(f"Access count handler: Error updating access counts: {e}")
421
+ # Convert string UUIDs to UUID type for faster matching
422
+ uuid_list = [uuid.UUID(nid) for nid in node_ids]
423
+ async with acquire_with_retry(pool) as conn:
424
+ await conn.execute(
425
+ f"UPDATE {fq_table('memory_units')} SET access_count = access_count + 1 WHERE id = ANY($1::uuid[])",
426
+ uuid_list,
427
+ )
415
428
 
416
429
  async def _handle_batch_retain(self, task_dict: dict[str, Any]):
417
430
  """
@@ -419,29 +432,27 @@ class MemoryEngine(MemoryEngineInterface):
419
432
 
420
433
  Args:
421
434
  task_dict: Dict with 'bank_id', 'contents'
422
- """
423
- try:
424
- bank_id = task_dict.get("bank_id")
425
- if not bank_id:
426
- raise ValueError("bank_id is required for batch retain task")
427
- contents = task_dict.get("contents", [])
428
435
 
429
- logger.info(
430
- f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items"
431
- )
436
+ Raises:
437
+ ValueError: If bank_id is missing
438
+ Exception: Any exception from retain_batch_async (propagates to execute_task for retry)
439
+ """
440
+ bank_id = task_dict.get("bank_id")
441
+ if not bank_id:
442
+ raise ValueError("bank_id is required for batch retain task")
443
+ contents = task_dict.get("contents", [])
432
444
 
433
- # Use internal request context for background tasks
434
- from hindsight_api.models import RequestContext
445
+ logger.info(
446
+ f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items"
447
+ )
435
448
 
436
- internal_context = RequestContext()
437
- await self.retain_batch_async(bank_id=bank_id, contents=contents, request_context=internal_context)
449
+ # Use internal request context for background tasks
450
+ from hindsight_api.models import RequestContext
438
451
 
439
- logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
440
- except Exception as e:
441
- logger.error(f"Batch retain handler: Error processing batch retain: {e}")
442
- import traceback
452
+ internal_context = RequestContext()
453
+ await self.retain_batch_async(bank_id=bank_id, contents=contents, request_context=internal_context)
443
454
 
444
- traceback.print_exc()
455
+ logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
445
456
 
446
457
  async def execute_task(self, task_dict: dict[str, Any]):
447
458
  """
@@ -594,6 +605,8 @@ class MemoryEngine(MemoryEngineInterface):
594
605
  await loop.run_in_executor(None, lambda: asyncio.run(cross_encoder.initialize()))
595
606
  else:
596
607
  await cross_encoder.initialize()
608
+ # Mark reranker as initialized
609
+ self._cross_encoder_reranker._initialized = True
597
610
 
598
611
  async def init_query_analyzer():
599
612
  """Initialize query analyzer model."""
@@ -602,16 +615,26 @@ class MemoryEngine(MemoryEngineInterface):
602
615
 
603
616
  async def verify_llm():
604
617
  """Verify LLM connection is working."""
605
- await self._llm_config.verify_connection()
618
+ if not self._skip_llm_verification:
619
+ await self._llm_config.verify_connection()
606
620
 
607
- # Run pg0 and all model initializations in parallel
608
- await asyncio.gather(
621
+ # Build list of initialization tasks
622
+ init_tasks = [
609
623
  start_pg0(),
610
624
  init_embeddings(),
611
- init_cross_encoder(),
612
625
  init_query_analyzer(),
613
- verify_llm(),
614
- )
626
+ ]
627
+
628
+ # Only init cross-encoder eagerly if not using lazy initialization
629
+ if not self._lazy_reranker:
630
+ init_tasks.append(init_cross_encoder())
631
+
632
+ # Only verify LLM if not skipping
633
+ if not self._skip_llm_verification:
634
+ init_tasks.append(verify_llm())
635
+
636
+ # Run pg0 and selected model initializations in parallel
637
+ await asyncio.gather(*init_tasks)
615
638
 
616
639
  # Run database migrations if enabled
617
640
  if self._run_migrations:
@@ -1641,6 +1664,9 @@ class MemoryEngine(MemoryEngineInterface):
1641
1664
  step_start = time.time()
1642
1665
  reranker_instance = self._cross_encoder_reranker
1643
1666
 
1667
+ # Ensure reranker is initialized (for lazy initialization mode)
1668
+ await reranker_instance.ensure_initialized()
1669
+
1644
1670
  # Rerank using cross-encoder
1645
1671
  scored_results = reranker_instance.rerank(query, merged_candidates)
1646
1672
 
@@ -3649,90 +3675,87 @@ Guidelines:
3649
3675
  task_dict: Dict with 'bank_id' and either:
3650
3676
  - 'entity_ids' (list): Process multiple entities
3651
3677
  - 'entity_id', 'entity_name': Process single entity (legacy)
3678
+
3679
+ Raises:
3680
+ ValueError: If required fields are missing
3681
+ Exception: Any exception from regenerate_entity_observations (propagates to execute_task for retry)
3652
3682
  """
3653
- try:
3654
- bank_id = task_dict.get("bank_id")
3655
- # Use internal request context for background tasks
3656
- from hindsight_api.models import RequestContext
3683
+ bank_id = task_dict.get("bank_id")
3684
+ # Use internal request context for background tasks
3685
+ from hindsight_api.models import RequestContext
3657
3686
 
3658
- internal_context = RequestContext()
3687
+ internal_context = RequestContext()
3659
3688
 
3660
- # New format: multiple entity_ids
3661
- if "entity_ids" in task_dict:
3662
- entity_ids = task_dict.get("entity_ids", [])
3663
- min_facts = task_dict.get("min_facts", 5)
3689
+ # New format: multiple entity_ids
3690
+ if "entity_ids" in task_dict:
3691
+ entity_ids = task_dict.get("entity_ids", [])
3692
+ min_facts = task_dict.get("min_facts", 5)
3664
3693
 
3665
- if not bank_id or not entity_ids:
3666
- logger.error(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
3667
- return
3694
+ if not bank_id or not entity_ids:
3695
+ raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
3668
3696
 
3669
- # Process each entity
3670
- pool = await self._get_pool()
3671
- async with pool.acquire() as conn:
3672
- for entity_id in entity_ids:
3673
- try:
3674
- # Fetch entity name and check fact count
3675
- import uuid as uuid_module
3697
+ # Process each entity
3698
+ pool = await self._get_pool()
3699
+ async with pool.acquire() as conn:
3700
+ for entity_id in entity_ids:
3701
+ try:
3702
+ # Fetch entity name and check fact count
3703
+ import uuid as uuid_module
3676
3704
 
3677
- entity_uuid = uuid_module.UUID(entity_id) if isinstance(entity_id, str) else entity_id
3705
+ entity_uuid = uuid_module.UUID(entity_id) if isinstance(entity_id, str) else entity_id
3678
3706
 
3679
- # First check if entity exists
3680
- entity_exists = await conn.fetchrow(
3681
- f"SELECT canonical_name FROM {fq_table('entities')} WHERE id = $1 AND bank_id = $2",
3682
- entity_uuid,
3683
- bank_id,
3684
- )
3707
+ # First check if entity exists
3708
+ entity_exists = await conn.fetchrow(
3709
+ f"SELECT canonical_name FROM {fq_table('entities')} WHERE id = $1 AND bank_id = $2",
3710
+ entity_uuid,
3711
+ bank_id,
3712
+ )
3685
3713
 
3686
- if not entity_exists:
3687
- logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
3688
- continue
3714
+ if not entity_exists:
3715
+ logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
3716
+ continue
3689
3717
 
3690
- entity_name = entity_exists["canonical_name"]
3718
+ entity_name = entity_exists["canonical_name"]
3691
3719
 
3692
- # Count facts linked to this entity
3693
- fact_count = (
3694
- await conn.fetchval(
3695
- f"SELECT COUNT(*) FROM {fq_table('unit_entities')} WHERE entity_id = $1",
3696
- entity_uuid,
3697
- )
3698
- or 0
3720
+ # Count facts linked to this entity
3721
+ fact_count = (
3722
+ await conn.fetchval(
3723
+ f"SELECT COUNT(*) FROM {fq_table('unit_entities')} WHERE entity_id = $1",
3724
+ entity_uuid,
3699
3725
  )
3726
+ or 0
3727
+ )
3700
3728
 
3701
- # Only regenerate if entity has enough facts
3702
- if fact_count >= min_facts:
3703
- await self.regenerate_entity_observations(
3704
- bank_id, entity_id, entity_name, version=None, request_context=internal_context
3705
- )
3706
- else:
3707
- logger.debug(
3708
- f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)"
3709
- )
3710
-
3711
- except Exception as e:
3712
- logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
3713
- continue
3714
-
3715
- # Legacy format: single entity
3716
- else:
3717
- entity_id = task_dict.get("entity_id")
3718
- entity_name = task_dict.get("entity_name")
3719
- version = task_dict.get("version")
3729
+ # Only regenerate if entity has enough facts
3730
+ if fact_count >= min_facts:
3731
+ await self.regenerate_entity_observations(
3732
+ bank_id, entity_id, entity_name, version=None, request_context=internal_context
3733
+ )
3734
+ else:
3735
+ logger.debug(
3736
+ f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)"
3737
+ )
3720
3738
 
3721
- if not all([bank_id, entity_id, entity_name]):
3722
- logger.error(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
3723
- return
3739
+ except Exception as e:
3740
+ # Log but continue processing other entities - individual entity failures
3741
+ # shouldn't fail the whole batch
3742
+ logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
3743
+ continue
3724
3744
 
3725
- # Type assertions after validation
3726
- assert isinstance(bank_id, str) and isinstance(entity_id, str) and isinstance(entity_name, str)
3727
- await self.regenerate_entity_observations(
3728
- bank_id, entity_id, entity_name, version=version, request_context=internal_context
3729
- )
3745
+ # Legacy format: single entity
3746
+ else:
3747
+ entity_id = task_dict.get("entity_id")
3748
+ entity_name = task_dict.get("entity_name")
3749
+ version = task_dict.get("version")
3730
3750
 
3731
- except Exception as e:
3732
- logger.error(f"[OBSERVATIONS] Error regenerating observations: {e}")
3733
- import traceback
3751
+ if not all([bank_id, entity_id, entity_name]):
3752
+ raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
3734
3753
 
3735
- traceback.print_exc()
3754
+ # Type assertions after validation
3755
+ assert isinstance(bank_id, str) and isinstance(entity_id, str) and isinstance(entity_name, str)
3756
+ await self.regenerate_entity_observations(
3757
+ bank_id, entity_id, entity_name, version=version, request_context=internal_context
3758
+ )
3736
3759
 
3737
3760
  # =========================================================================
3738
3761
  # Statistics & Operations (for HTTP API layer)
@@ -26,6 +26,23 @@ class CrossEncoderReranker:
26
26
 
27
27
  cross_encoder = create_cross_encoder_from_env()
28
28
  self.cross_encoder = cross_encoder
29
+ self._initialized = False
30
+
31
+ async def ensure_initialized(self):
32
+ """Ensure the cross-encoder model is initialized (for lazy initialization)."""
33
+ if self._initialized:
34
+ return
35
+
36
+ import asyncio
37
+
38
+ cross_encoder = self.cross_encoder
39
+ # For local providers, run in thread pool to avoid blocking event loop
40
+ if cross_encoder.provider_name == "local":
41
+ loop = asyncio.get_event_loop()
42
+ await loop.run_in_executor(None, lambda: asyncio.run(cross_encoder.initialize()))
43
+ else:
44
+ await cross_encoder.initialize()
45
+ self._initialized = True
29
46
 
30
47
  def rerank(self, query: str, candidates: list[MergedCandidate]) -> list[ScoredResult]:
31
48
  """
@@ -4,6 +4,9 @@ Command-line interface for Hindsight API.
4
4
  Run the server with:
5
5
  hindsight-api
6
6
 
7
+ Run as background daemon:
8
+ hindsight-api --daemon
9
+
7
10
  Stop with Ctrl+C.
8
11
  """
9
12
 
@@ -21,9 +24,13 @@ from . import MemoryEngine
21
24
  from .api import create_app
22
25
  from .banner import print_banner
23
26
  from .config import HindsightConfig, get_config
24
-
25
- print()
26
- print_banner()
27
+ from .daemon import (
28
+ DEFAULT_DAEMON_PORT,
29
+ DEFAULT_IDLE_TIMEOUT,
30
+ DaemonLock,
31
+ IdleTimeoutMiddleware,
32
+ daemonize,
33
+ )
27
34
 
28
35
  # Filter deprecation warnings from third-party libraries
29
36
  warnings.filterwarnings("ignore", message="websockets.legacy is deprecated")
@@ -106,8 +113,52 @@ def main():
106
113
  parser.add_argument("--ssl-keyfile", default=None, help="SSL key file")
107
114
  parser.add_argument("--ssl-certfile", default=None, help="SSL certificate file")
108
115
 
116
+ # Daemon mode options
117
+ parser.add_argument(
118
+ "--daemon",
119
+ action="store_true",
120
+ help=f"Run as background daemon (uses port {DEFAULT_DAEMON_PORT}, auto-exits after idle)",
121
+ )
122
+ parser.add_argument(
123
+ "--idle-timeout",
124
+ type=int,
125
+ default=DEFAULT_IDLE_TIMEOUT,
126
+ help=f"Idle timeout in seconds before auto-exit in daemon mode (default: {DEFAULT_IDLE_TIMEOUT})",
127
+ )
128
+
109
129
  args = parser.parse_args()
110
130
 
131
+ # Daemon mode handling
132
+ if args.daemon:
133
+ # Use fixed daemon port
134
+ args.port = DEFAULT_DAEMON_PORT
135
+ args.host = "127.0.0.1" # Only bind to localhost for security
136
+
137
+ # Check if another daemon is already running
138
+ daemon_lock = DaemonLock()
139
+ if not daemon_lock.acquire():
140
+ print(f"Daemon already running (PID: {daemon_lock.get_pid()})", file=sys.stderr)
141
+ sys.exit(1)
142
+
143
+ # Fork into background
144
+ daemonize()
145
+
146
+ # Re-acquire lock in child process
147
+ daemon_lock = DaemonLock()
148
+ if not daemon_lock.acquire():
149
+ sys.exit(1)
150
+
151
+ # Register cleanup to release lock
152
+ def release_lock():
153
+ daemon_lock.release()
154
+
155
+ atexit.register(release_lock)
156
+
157
+ # Print banner (not in daemon mode)
158
+ if not args.daemon:
159
+ print()
160
+ print_banner()
161
+
111
162
  # Configure Python logging based on log level
112
163
  # Update config with CLI override if provided
113
164
  if args.log_level != config.log_level:
@@ -128,9 +179,12 @@ def main():
128
179
  log_level=args.log_level,
129
180
  mcp_enabled=config.mcp_enabled,
130
181
  graph_retriever=config.graph_retriever,
182
+ skip_llm_verification=config.skip_llm_verification,
183
+ lazy_reranker=config.lazy_reranker,
131
184
  )
132
185
  config.configure_logging()
133
- config.log_config()
186
+ if not args.daemon:
187
+ config.log_config()
134
188
 
135
189
  # Register cleanup handlers
136
190
  atexit.register(_cleanup)
@@ -149,6 +203,12 @@ def main():
149
203
  initialize_memory=True,
150
204
  )
151
205
 
206
+ # Wrap with idle timeout middleware in daemon mode
207
+ idle_middleware = None
208
+ if args.daemon:
209
+ idle_middleware = IdleTimeoutMiddleware(app, idle_timeout=args.idle_timeout)
210
+ app = idle_middleware
211
+
152
212
  # Prepare uvicorn config
153
213
  uvicorn_config = {
154
214
  "app": app,
@@ -172,18 +232,38 @@ def main():
172
232
  if args.ssl_certfile:
173
233
  uvicorn_config["ssl_certfile"] = args.ssl_certfile
174
234
 
175
- from .banner import print_startup_info
176
-
177
- print_startup_info(
178
- host=args.host,
179
- port=args.port,
180
- database_url=config.database_url,
181
- llm_provider=config.llm_provider,
182
- llm_model=config.llm_model,
183
- embeddings_provider=config.embeddings_provider,
184
- reranker_provider=config.reranker_provider,
185
- mcp_enabled=config.mcp_enabled,
186
- )
235
+ # Print startup info (not in daemon mode)
236
+ if not args.daemon:
237
+ from .banner import print_startup_info
238
+
239
+ print_startup_info(
240
+ host=args.host,
241
+ port=args.port,
242
+ database_url=config.database_url,
243
+ llm_provider=config.llm_provider,
244
+ llm_model=config.llm_model,
245
+ embeddings_provider=config.embeddings_provider,
246
+ reranker_provider=config.reranker_provider,
247
+ mcp_enabled=config.mcp_enabled,
248
+ )
249
+
250
+ # Start idle checker in daemon mode
251
+ if idle_middleware is not None:
252
+ # Start the idle checker in a background thread with its own event loop
253
+ import threading
254
+
255
+ def run_idle_checker():
256
+ import time
257
+
258
+ time.sleep(2) # Wait for uvicorn to start
259
+ try:
260
+ loop = asyncio.new_event_loop()
261
+ asyncio.set_event_loop(loop)
262
+ loop.run_until_complete(idle_middleware._check_idle())
263
+ except Exception:
264
+ pass
265
+
266
+ threading.Thread(target=run_idle_checker, daemon=True).start()
187
267
 
188
268
  uvicorn.run(**uvicorn_config) # type: ignore[invalid-argument-type] - dict kwargs
189
269
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hindsight-api"
7
- version = "0.1.12"
7
+ version = "0.1.14"
8
8
  description = "Hindsight: Agent Memory That Works Like Human Memory"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
File without changes