hindsight-api 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hindsight_api/__init__.py CHANGED
@@ -46,4 +46,4 @@ __all__ = [
46
46
  "RemoteTEICrossEncoder",
47
47
  "LLMConfig",
48
48
  ]
49
- __version__ = "0.1.0"
49
+ __version__ = "0.4.2"
hindsight_api/api/http.py CHANGED
@@ -1323,7 +1323,7 @@ class VersionResponse(BaseModel):
1323
1323
  model_config = ConfigDict(
1324
1324
  json_schema_extra={
1325
1325
  "example": {
1326
- "api_version": "1.0.0",
1326
+ "api_version": "0.4.0",
1327
1327
  "features": {
1328
1328
  "observations": False,
1329
1329
  "mcp": True,
@@ -1567,11 +1567,12 @@ def _register_routes(app: FastAPI):
1567
1567
  Returns version info and feature flags that can be used by clients
1568
1568
  to determine which capabilities are available.
1569
1569
  """
1570
+ from hindsight_api import __version__
1570
1571
  from hindsight_api.config import get_config
1571
1572
 
1572
1573
  config = get_config()
1573
1574
  return VersionResponse(
1574
- api_version="1.0.0",
1575
+ api_version=__version__,
1575
1576
  features=FeaturesInfo(
1576
1577
  observations=config.enable_observations,
1577
1578
  mcp=config.mcp_enabled,
hindsight_api/config.py CHANGED
@@ -20,11 +20,15 @@ logger = logging.getLogger(__name__)
20
20
 
21
21
  # Environment variable names
22
22
  ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
23
+ ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
23
24
  ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
24
25
  ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
25
26
  ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
26
27
  ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
27
28
  ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
29
+ ENV_LLM_MAX_RETRIES = "HINDSIGHT_API_LLM_MAX_RETRIES"
30
+ ENV_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_LLM_INITIAL_BACKOFF"
31
+ ENV_LLM_MAX_BACKOFF = "HINDSIGHT_API_LLM_MAX_BACKOFF"
28
32
  ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
29
33
  ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
30
34
 
@@ -33,19 +37,35 @@ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
33
37
  ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
34
38
  ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
35
39
  ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
40
+ ENV_RETAIN_LLM_MAX_CONCURRENT = "HINDSIGHT_API_RETAIN_LLM_MAX_CONCURRENT"
41
+ ENV_RETAIN_LLM_MAX_RETRIES = "HINDSIGHT_API_RETAIN_LLM_MAX_RETRIES"
42
+ ENV_RETAIN_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_INITIAL_BACKOFF"
43
+ ENV_RETAIN_LLM_MAX_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF"
44
+ ENV_RETAIN_LLM_TIMEOUT = "HINDSIGHT_API_RETAIN_LLM_TIMEOUT"
36
45
 
37
46
  ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
38
47
  ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
39
48
  ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
40
49
  ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
50
+ ENV_REFLECT_LLM_MAX_CONCURRENT = "HINDSIGHT_API_REFLECT_LLM_MAX_CONCURRENT"
51
+ ENV_REFLECT_LLM_MAX_RETRIES = "HINDSIGHT_API_REFLECT_LLM_MAX_RETRIES"
52
+ ENV_REFLECT_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_INITIAL_BACKOFF"
53
+ ENV_REFLECT_LLM_MAX_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_MAX_BACKOFF"
54
+ ENV_REFLECT_LLM_TIMEOUT = "HINDSIGHT_API_REFLECT_LLM_TIMEOUT"
41
55
 
42
56
  ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
43
57
  ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
44
58
  ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
45
59
  ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
60
+ ENV_CONSOLIDATION_LLM_MAX_CONCURRENT = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_CONCURRENT"
61
+ ENV_CONSOLIDATION_LLM_MAX_RETRIES = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_RETRIES"
62
+ ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_INITIAL_BACKOFF"
63
+ ENV_CONSOLIDATION_LLM_MAX_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_BACKOFF"
64
+ ENV_CONSOLIDATION_LLM_TIMEOUT = "HINDSIGHT_API_CONSOLIDATION_LLM_TIMEOUT"
46
65
 
47
66
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
48
67
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
68
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
49
69
  ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
50
70
  ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
51
71
  ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
@@ -65,6 +85,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
65
85
 
66
86
  ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
67
87
  ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
88
+ ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
68
89
  ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
69
90
  ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
70
91
  ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
@@ -98,6 +119,7 @@ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
98
119
  # Observations settings (consolidated knowledge from facts)
99
120
  ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
100
121
  ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
122
+ ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
101
123
 
102
124
  # Optimization flags
103
125
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
@@ -125,18 +147,24 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
125
147
 
126
148
  # Default values
127
149
  DEFAULT_DATABASE_URL = "pg0"
150
+ DEFAULT_DATABASE_SCHEMA = "public"
128
151
  DEFAULT_LLM_PROVIDER = "openai"
129
152
  DEFAULT_LLM_MODEL = "gpt-5-mini"
130
153
  DEFAULT_LLM_MAX_CONCURRENT = 32
154
+ DEFAULT_LLM_MAX_RETRIES = 10 # Max retry attempts for LLM API calls
155
+ DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry exponential backoff
156
+ DEFAULT_LLM_MAX_BACKOFF = 60.0 # Max backoff cap in seconds for retry exponential backoff
131
157
  DEFAULT_LLM_TIMEOUT = 120.0 # seconds
132
158
 
133
159
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
134
160
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
161
+ DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
135
162
  DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
136
163
  DEFAULT_EMBEDDING_DIMENSION = 384
137
164
 
138
165
  DEFAULT_RERANKER_PROVIDER = "local"
139
166
  DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
167
+ DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
140
168
  DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
141
169
  DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
142
170
  DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
@@ -177,6 +205,7 @@ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (a
177
205
  # Observations defaults (consolidated knowledge from facts)
178
206
  DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
179
207
  DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
208
+ DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
180
209
 
181
210
  # Database migrations
182
211
  DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
@@ -270,6 +299,7 @@ class HindsightConfig:
270
299
 
271
300
  # Database
272
301
  database_url: str
302
+ database_schema: str
273
303
 
274
304
  # LLM (default, used as fallback for per-operation config)
275
305
  llm_provider: str
@@ -277,6 +307,9 @@ class HindsightConfig:
277
307
  llm_model: str
278
308
  llm_base_url: str | None
279
309
  llm_max_concurrent: int
310
+ llm_max_retries: int
311
+ llm_initial_backoff: float
312
+ llm_max_backoff: float
280
313
  llm_timeout: float
281
314
 
282
315
  # Per-operation LLM configuration (None = use default LLM config)
@@ -284,20 +317,36 @@ class HindsightConfig:
284
317
  retain_llm_api_key: str | None
285
318
  retain_llm_model: str | None
286
319
  retain_llm_base_url: str | None
320
+ retain_llm_max_concurrent: int | None
321
+ retain_llm_max_retries: int | None
322
+ retain_llm_initial_backoff: float | None
323
+ retain_llm_max_backoff: float | None
324
+ retain_llm_timeout: float | None
287
325
 
288
326
  reflect_llm_provider: str | None
289
327
  reflect_llm_api_key: str | None
290
328
  reflect_llm_model: str | None
291
329
  reflect_llm_base_url: str | None
330
+ reflect_llm_max_concurrent: int | None
331
+ reflect_llm_max_retries: int | None
332
+ reflect_llm_initial_backoff: float | None
333
+ reflect_llm_max_backoff: float | None
334
+ reflect_llm_timeout: float | None
292
335
 
293
336
  consolidation_llm_provider: str | None
294
337
  consolidation_llm_api_key: str | None
295
338
  consolidation_llm_model: str | None
296
339
  consolidation_llm_base_url: str | None
340
+ consolidation_llm_max_concurrent: int | None
341
+ consolidation_llm_max_retries: int | None
342
+ consolidation_llm_initial_backoff: float | None
343
+ consolidation_llm_max_backoff: float | None
344
+ consolidation_llm_timeout: float | None
297
345
 
298
346
  # Embeddings
299
347
  embeddings_provider: str
300
348
  embeddings_local_model: str
349
+ embeddings_local_force_cpu: bool
301
350
  embeddings_tei_url: str | None
302
351
  embeddings_openai_base_url: str | None
303
352
  embeddings_cohere_base_url: str | None
@@ -305,6 +354,8 @@ class HindsightConfig:
305
354
  # Reranker
306
355
  reranker_provider: str
307
356
  reranker_local_model: str
357
+ reranker_local_force_cpu: bool
358
+ reranker_local_max_concurrent: int
308
359
  reranker_tei_url: str | None
309
360
  reranker_tei_batch_size: int
310
361
  reranker_tei_max_concurrent: int
@@ -336,6 +387,7 @@ class HindsightConfig:
336
387
  # Observations settings (consolidated knowledge from facts)
337
388
  enable_observations: bool
338
389
  consolidation_batch_size: int
390
+ consolidation_max_tokens: int
339
391
 
340
392
  # Optimization flags
341
393
  skip_llm_verification: bool
@@ -367,35 +419,93 @@ class HindsightConfig:
367
419
  return cls(
368
420
  # Database
369
421
  database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
422
+ database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
370
423
  # LLM
371
424
  llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
372
425
  llm_api_key=os.getenv(ENV_LLM_API_KEY),
373
426
  llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
374
427
  llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
375
428
  llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
429
+ llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
430
+ llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
431
+ llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
376
432
  llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
377
433
  # Per-operation LLM config (None = use default)
378
434
  retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
379
435
  retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
380
436
  retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
381
437
  retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
438
+ retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
439
+ if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
440
+ else None,
441
+ retain_llm_max_retries=int(os.getenv(ENV_RETAIN_LLM_MAX_RETRIES))
442
+ if os.getenv(ENV_RETAIN_LLM_MAX_RETRIES)
443
+ else None,
444
+ retain_llm_initial_backoff=float(os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF))
445
+ if os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF)
446
+ else None,
447
+ retain_llm_max_backoff=float(os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF))
448
+ if os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF)
449
+ else None,
450
+ retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
382
451
  reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
383
452
  reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
384
453
  reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
385
454
  reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
455
+ reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
456
+ if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
457
+ else None,
458
+ reflect_llm_max_retries=int(os.getenv(ENV_REFLECT_LLM_MAX_RETRIES))
459
+ if os.getenv(ENV_REFLECT_LLM_MAX_RETRIES)
460
+ else None,
461
+ reflect_llm_initial_backoff=float(os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF))
462
+ if os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF)
463
+ else None,
464
+ reflect_llm_max_backoff=float(os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF))
465
+ if os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF)
466
+ else None,
467
+ reflect_llm_timeout=float(os.getenv(ENV_REFLECT_LLM_TIMEOUT))
468
+ if os.getenv(ENV_REFLECT_LLM_TIMEOUT)
469
+ else None,
386
470
  consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
387
471
  consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
388
472
  consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
389
473
  consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
474
+ consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
475
+ if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
476
+ else None,
477
+ consolidation_llm_max_retries=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES))
478
+ if os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES)
479
+ else None,
480
+ consolidation_llm_initial_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF))
481
+ if os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF)
482
+ else None,
483
+ consolidation_llm_max_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF))
484
+ if os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF)
485
+ else None,
486
+ consolidation_llm_timeout=float(os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT))
487
+ if os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT)
488
+ else None,
390
489
  # Embeddings
391
490
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
392
491
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
492
+ embeddings_local_force_cpu=os.getenv(
493
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
494
+ ).lower()
495
+ in ("true", "1"),
393
496
  embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
394
497
  embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
395
498
  embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
396
499
  # Reranker
397
500
  reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
398
501
  reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
502
+ reranker_local_force_cpu=os.getenv(
503
+ ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
504
+ ).lower()
505
+ in ("true", "1"),
506
+ reranker_local_max_concurrent=int(
507
+ os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
508
+ ),
399
509
  reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
400
510
  reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
401
511
  reranker_tei_max_concurrent=int(
@@ -444,6 +554,9 @@ class HindsightConfig:
444
554
  consolidation_batch_size=int(
445
555
  os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
446
556
  ),
557
+ consolidation_max_tokens=int(
558
+ os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
559
+ ),
447
560
  # Database migrations
448
561
  run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
449
562
  # Database connection pool
@@ -515,7 +628,7 @@ class HindsightConfig:
515
628
 
516
629
  def log_config(self) -> None:
517
630
  """Log the current configuration (without sensitive values)."""
518
- logger.info(f"Database: {self.database_url}")
631
+ logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
519
632
  logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
520
633
  if self.retain_llm_provider or self.retain_llm_model:
521
634
  retain_provider = self.retain_llm_provider or self.llm_provider
hindsight_api/daemon.py CHANGED
@@ -52,7 +52,10 @@ class IdleTimeoutMiddleware:
52
52
  logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
53
53
  # Give a moment for any in-flight requests
54
54
  await asyncio.sleep(1)
55
- os._exit(0)
55
+ # Send SIGTERM to ourselves to trigger graceful shutdown
56
+ import signal
57
+
58
+ os.kill(os.getpid(), signal.SIGTERM)
56
59
 
57
60
 
58
61
  class DaemonLock:
@@ -144,10 +144,14 @@ async def run_consolidation_job(
144
144
  }
145
145
 
146
146
  batch_num = 0
147
+ last_progress_timings = {} # Track timings at last progress log
147
148
  while True:
148
149
  batch_num += 1
149
150
  batch_start = time.time()
150
151
 
152
+ # Snapshot timings at batch start for per-batch calculation
153
+ batch_start_timings = perf.timings.copy()
154
+
151
155
  # Fetch next batch of unconsolidated memories
152
156
  async with pool.acquire() as conn:
153
157
  t0 = time.time()
@@ -217,19 +221,44 @@ async def run_consolidation_job(
217
221
  elif action == "skipped":
218
222
  stats["skipped"] += 1
219
223
 
220
- # Log progress periodically
224
+ # Log progress periodically with timing breakdown
221
225
  if stats["memories_processed"] % 10 == 0:
226
+ # Calculate timing deltas since last progress log
227
+ timing_parts = []
228
+ for key in ["recall", "llm", "embedding", "db_write"]:
229
+ if key in perf.timings:
230
+ delta = perf.timings[key] - last_progress_timings.get(key, 0)
231
+ timing_parts.append(f"{key}={delta:.2f}s")
232
+
233
+ timing_str = f" | {', '.join(timing_parts)}" if timing_parts else ""
222
234
  logger.info(
223
235
  f"[CONSOLIDATION] bank={bank_id} progress: "
224
- f"{stats['memories_processed']}/{total_count} memories processed"
236
+ f"{stats['memories_processed']}/{total_count} memories processed{timing_str}"
225
237
  )
226
238
 
239
+ # Update last progress snapshot
240
+ last_progress_timings = perf.timings.copy()
241
+
227
242
  batch_time = time.time() - batch_start
228
243
  perf.log(
229
244
  f"[2] Batch {batch_num}: {len(memories)} memories in {batch_time:.3f}s "
230
245
  f"(avg {batch_time / len(memories):.3f}s/memory)"
231
246
  )
232
247
 
248
+ # Log timing breakdown after each batch (delta from batch start)
249
+ timing_parts = []
250
+ for key in ["recall", "llm", "embedding", "db_write"]:
251
+ if key in perf.timings:
252
+ delta = perf.timings[key] - batch_start_timings.get(key, 0)
253
+ timing_parts.append(f"{key}={delta:.3f}s")
254
+
255
+ if timing_parts:
256
+ avg_per_memory = batch_time / len(memories) if memories else 0
257
+ logger.info(
258
+ f"[CONSOLIDATION] bank={bank_id} batch {batch_num}/{len(memories)} memories: "
259
+ f"{', '.join(timing_parts)} | avg={avg_per_memory:.3f}s/memory"
260
+ )
261
+
233
262
  # Build summary
234
263
  perf.log(
235
264
  f"[3] Results: {stats['memories_processed']} memories -> "
@@ -639,28 +668,27 @@ async def _find_related_observations(
639
668
  request_context: "RequestContext",
640
669
  ) -> list[dict[str, Any]]:
641
670
  """
642
- Find observations related to the given query using the full recall system.
671
+ Find observations related to the given query using optimized recall.
643
672
 
644
673
  IMPORTANT: We do NOT filter by tags here. Consolidation needs to see ALL
645
674
  potentially related observations regardless of scope, so the LLM can
646
675
  decide on tag routing (same scope update vs cross-scope create).
647
676
 
648
- This leverages:
649
- - Semantic search (embedding similarity)
650
- - BM25 text search (keyword matching)
651
- - Entity-based retrieval (shared entities)
652
- - Graph traversal (connected via entity links)
677
+ Uses max_tokens to naturally limit observations (no artificial count limit).
678
+ Includes source memories with dates for LLM context.
653
679
 
654
680
  Returns:
655
- List of related observations with their tags for LLM tag routing
681
+ List of related observations with their tags, source memories, and dates
656
682
  """
657
- # Use recall to find related observations
658
- # NO tags parameter - we want ALL observations regardless of scope
659
- # Use low max_tokens since we only need observations, not memories
683
+ # Use recall to find related observations with token budget
684
+ # max_tokens naturally limits how many observations are returned
685
+ from ...config import get_config
686
+
687
+ config = get_config()
660
688
  recall_result = await memory_engine.recall_async(
661
689
  bank_id=bank_id,
662
690
  query=query,
663
- max_tokens=5000, # Token budget for observations
691
+ max_tokens=config.consolidation_max_tokens, # Token budget for observations (configurable)
664
692
  fact_type=["observation"], # Only retrieve observations
665
693
  request_context=request_context,
666
694
  _quiet=True, # Suppress logging
@@ -668,43 +696,82 @@ async def _find_related_observations(
668
696
  )
669
697
 
670
698
  # If no observations returned, return empty list
671
- # When fact_type=["observation"], results come back in `results` field
672
699
  if not recall_result.results:
673
700
  return []
674
701
 
675
- # Trust recall's relevance filtering - fetch full data for each observation
702
+ # Batch fetch all observations in a single query (no artificial limit)
703
+ observation_ids = [uuid.UUID(obs.id) for obs in recall_result.results]
704
+
705
+ rows = await conn.fetch(
706
+ f"""
707
+ SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at,
708
+ occurred_start, occurred_end, mentioned_at
709
+ FROM {fq_table("memory_units")}
710
+ WHERE id = ANY($1) AND bank_id = $2 AND fact_type = 'observation'
711
+ """,
712
+ observation_ids,
713
+ bank_id,
714
+ )
715
+
716
+ # Build results list preserving recall order
717
+ id_to_row = {row["id"]: row for row in rows}
676
718
  results = []
677
- for obs in recall_result.results:
678
- # Fetch full observation data from DB to get history, source_memory_ids, tags
679
- row = await conn.fetchrow(
680
- f"""
681
- SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
682
- FROM {fq_table("memory_units")}
683
- WHERE id = $1 AND bank_id = $2 AND fact_type = 'observation'
684
- """,
685
- uuid.UUID(obs.id),
686
- bank_id,
687
- )
688
719
 
689
- if row:
690
- history = row["history"]
691
- if isinstance(history, str):
692
- history = json.loads(history)
693
- elif history is None:
694
- history = []
695
-
696
- results.append(
697
- {
698
- "id": row["id"],
699
- "text": row["text"],
700
- "proof_count": row["proof_count"] or 1,
701
- "history": history,
702
- "tags": row["tags"] or [], # Include tags for LLM tag routing
703
- "source_memory_ids": row["source_memory_ids"] or [],
704
- "similarity": 1.0, # Retrieved via recall so assumed relevant
705
- }
720
+ for obs in recall_result.results:
721
+ obs_id = uuid.UUID(obs.id)
722
+ if obs_id not in id_to_row:
723
+ continue
724
+
725
+ row = id_to_row[obs_id]
726
+ history = row["history"]
727
+ if isinstance(history, str):
728
+ history = json.loads(history)
729
+ elif history is None:
730
+ history = []
731
+
732
+ # Fetch source memories to include their text and dates
733
+ source_memory_ids = row["source_memory_ids"] or []
734
+ source_memories = []
735
+
736
+ if source_memory_ids:
737
+ source_rows = await conn.fetch(
738
+ f"""
739
+ SELECT text, occurred_start, occurred_end, mentioned_at, event_date
740
+ FROM {fq_table("memory_units")}
741
+ WHERE id = ANY($1) AND bank_id = $2
742
+ ORDER BY created_at ASC
743
+ LIMIT 5
744
+ """,
745
+ source_memory_ids[:5], # Limit to first 5 source memories for token efficiency
746
+ bank_id,
706
747
  )
707
748
 
749
+ for src_row in source_rows:
750
+ source_memories.append(
751
+ {
752
+ "text": src_row["text"],
753
+ "occurred_start": src_row["occurred_start"],
754
+ "occurred_end": src_row["occurred_end"],
755
+ "mentioned_at": src_row["mentioned_at"],
756
+ "event_date": src_row["event_date"],
757
+ }
758
+ )
759
+
760
+ results.append(
761
+ {
762
+ "id": row["id"],
763
+ "text": row["text"],
764
+ "proof_count": row["proof_count"] or 1,
765
+ "tags": row["tags"] or [],
766
+ "source_memories": source_memories,
767
+ "occurred_start": row["occurred_start"],
768
+ "occurred_end": row["occurred_end"],
769
+ "mentioned_at": row["mentioned_at"],
770
+ "created_at": row["created_at"],
771
+ "updated_at": row["updated_at"],
772
+ }
773
+ )
774
+
708
775
  return results
709
776
 
710
777
 
@@ -732,14 +799,43 @@ async def _consolidate_with_llm(
732
799
  - {"action": "create", "text": "...", "reason": "..."}
733
800
  - [] if fact is purely ephemeral (no durable knowledge)
734
801
  """
735
- # Format observations WITH their tags (or "None" if empty)
802
+ # Format observations as JSON with source memories and dates
736
803
  if observations:
737
- observations_text = "\n".join(
738
- f'- ID: {obs["id"]}, Tags: {json.dumps(obs["tags"])}, Text: "{obs["text"]}" (proof_count: {obs["proof_count"]})'
739
- for obs in observations
740
- )
804
+ obs_list = []
805
+ for obs in observations:
806
+ obs_data = {
807
+ "id": str(obs["id"]),
808
+ "text": obs["text"],
809
+ "proof_count": obs["proof_count"],
810
+ "tags": obs["tags"],
811
+ "created_at": obs["created_at"].isoformat() if obs.get("created_at") else None,
812
+ "updated_at": obs["updated_at"].isoformat() if obs.get("updated_at") else None,
813
+ }
814
+
815
+ # Include temporal info if available
816
+ if obs.get("occurred_start"):
817
+ obs_data["occurred_start"] = obs["occurred_start"].isoformat()
818
+ if obs.get("occurred_end"):
819
+ obs_data["occurred_end"] = obs["occurred_end"].isoformat()
820
+ if obs.get("mentioned_at"):
821
+ obs_data["mentioned_at"] = obs["mentioned_at"].isoformat()
822
+
823
+ # Include source memories (up to 3 for brevity)
824
+ if obs.get("source_memories"):
825
+ obs_data["source_memories"] = [
826
+ {
827
+ "text": sm["text"],
828
+ "event_date": sm["event_date"].isoformat() if sm.get("event_date") else None,
829
+ "occurred_start": sm["occurred_start"].isoformat() if sm.get("occurred_start") else None,
830
+ }
831
+ for sm in obs["source_memories"][:3] # Limit to 3 for token efficiency
832
+ ]
833
+
834
+ obs_list.append(obs_data)
835
+
836
+ observations_text = json.dumps(obs_list, indent=2)
741
837
  else:
742
- observations_text = "None (this is a new topic - create if fact contains durable knowledge)"
838
+ observations_text = "[]"
743
839
 
744
840
  # Only include mission section if mission is set and not the default
745
841
  mission_section = ""
@@ -47,23 +47,31 @@ CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowle
47
47
  {mission_section}
48
48
  NEW FACT: {fact_text}
49
49
 
50
- EXISTING OBSERVATIONS:
50
+ EXISTING OBSERVATIONS (JSON array with source memories and dates):
51
51
  {observations_text}
52
52
 
53
- Instructions:
54
- 1. First, extract the DURABLE KNOWLEDGE from the fact (not ephemeral state like "user is at X")
55
- 2. Then compare with existing observations:
56
- - If an observation covers the same topic: UPDATE it with the new knowledge
57
- - If no observation covers the topic: CREATE a new one
53
+ Each observation includes:
54
+ - id: unique identifier for updating
55
+ - text: the observation content
56
+ - proof_count: number of supporting memories
57
+ - tags: visibility scope (handled automatically)
58
+ - created_at/updated_at: when observation was created/modified
59
+ - occurred_start/occurred_end: temporal range of source facts
60
+ - source_memories: array of supporting facts with their text and dates
58
61
 
59
- Output JSON array of actions (ALWAYS an array, even for single action):
62
+ Instructions:
63
+ 1. Extract DURABLE KNOWLEDGE from the new fact (not ephemeral state)
64
+ 2. Review source_memories in existing observations to understand evidence
65
+ 3. Check dates to detect contradictions or updates
66
+ 4. Compare with observations:
67
+ - Same topic → UPDATE with learning_id
68
+ - New topic → CREATE new observation
69
+ - Purely ephemeral → return []
70
+
71
+ Output JSON array of actions:
60
72
  [
61
- {{"action": "update", "learning_id": "uuid", "text": "updated durable knowledge", "reason": "..."}},
73
+ {{"action": "update", "learning_id": "uuid-from-observations", "text": "updated knowledge", "reason": "..."}},
62
74
  {{"action": "create", "text": "new durable knowledge", "reason": "..."}}
63
75
  ]
64
76
 
65
- If NO consolidation is needed (fact is purely ephemeral with no durable knowledge):
66
- []
67
-
68
- If no observations exist and fact contains durable knowledge:
69
- [{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""
77
+ Return [] if fact contains no durable knowledge."""