hindsight-api 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +1 -1
- hindsight_api/api/http.py +3 -2
- hindsight_api/config.py +29 -1
- hindsight_api/engine/consolidation/consolidator.py +114 -47
- hindsight_api/engine/consolidation/prompts.py +21 -13
- hindsight_api/engine/cross_encoder.py +50 -24
- hindsight_api/engine/embeddings.py +45 -19
- hindsight_api/engine/memory_engine.py +11 -5
- hindsight_api/engine/reflect/tools.py +1 -1
- hindsight_api/extensions/builtin/tenant.py +8 -5
- hindsight_api/main.py +12 -0
- {hindsight_api-0.4.0.dist-info → hindsight_api-0.4.1.dist-info}/METADATA +1 -1
- {hindsight_api-0.4.0.dist-info → hindsight_api-0.4.1.dist-info}/RECORD +15 -15
- {hindsight_api-0.4.0.dist-info → hindsight_api-0.4.1.dist-info}/WHEEL +0 -0
- {hindsight_api-0.4.0.dist-info → hindsight_api-0.4.1.dist-info}/entry_points.txt +0 -0
hindsight_api/__init__.py
CHANGED
hindsight_api/api/http.py
CHANGED
|
@@ -1323,7 +1323,7 @@ class VersionResponse(BaseModel):
|
|
|
1323
1323
|
model_config = ConfigDict(
|
|
1324
1324
|
json_schema_extra={
|
|
1325
1325
|
"example": {
|
|
1326
|
-
"api_version": "
|
|
1326
|
+
"api_version": "0.4.0",
|
|
1327
1327
|
"features": {
|
|
1328
1328
|
"observations": False,
|
|
1329
1329
|
"mcp": True,
|
|
@@ -1567,11 +1567,12 @@ def _register_routes(app: FastAPI):
|
|
|
1567
1567
|
Returns version info and feature flags that can be used by clients
|
|
1568
1568
|
to determine which capabilities are available.
|
|
1569
1569
|
"""
|
|
1570
|
+
from hindsight_api import __version__
|
|
1570
1571
|
from hindsight_api.config import get_config
|
|
1571
1572
|
|
|
1572
1573
|
config = get_config()
|
|
1573
1574
|
return VersionResponse(
|
|
1574
|
-
api_version=
|
|
1575
|
+
api_version=__version__,
|
|
1575
1576
|
features=FeaturesInfo(
|
|
1576
1577
|
observations=config.enable_observations,
|
|
1577
1578
|
mcp=config.mcp_enabled,
|
hindsight_api/config.py
CHANGED
|
@@ -20,6 +20,7 @@ logger = logging.getLogger(__name__)
|
|
|
20
20
|
|
|
21
21
|
# Environment variable names
|
|
22
22
|
ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
|
|
23
|
+
ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
|
|
23
24
|
ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
|
|
24
25
|
ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
|
|
25
26
|
ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
@@ -46,6 +47,7 @@ ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
|
46
47
|
|
|
47
48
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
48
49
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
50
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
|
|
49
51
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
50
52
|
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
|
|
51
53
|
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
|
|
@@ -65,6 +67,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
|
|
|
65
67
|
|
|
66
68
|
ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
|
|
67
69
|
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
|
|
70
|
+
ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
|
|
68
71
|
ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
|
|
69
72
|
ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
|
|
70
73
|
ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
|
|
@@ -98,6 +101,7 @@ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
|
98
101
|
# Observations settings (consolidated knowledge from facts)
|
|
99
102
|
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
100
103
|
ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
|
|
104
|
+
ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
|
|
101
105
|
|
|
102
106
|
# Optimization flags
|
|
103
107
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
@@ -125,6 +129,7 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
|
125
129
|
|
|
126
130
|
# Default values
|
|
127
131
|
DEFAULT_DATABASE_URL = "pg0"
|
|
132
|
+
DEFAULT_DATABASE_SCHEMA = "public"
|
|
128
133
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
129
134
|
DEFAULT_LLM_MODEL = "gpt-5-mini"
|
|
130
135
|
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
@@ -132,11 +137,13 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
|
132
137
|
|
|
133
138
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
134
139
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
140
|
+
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
|
|
135
141
|
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
|
|
136
142
|
DEFAULT_EMBEDDING_DIMENSION = 384
|
|
137
143
|
|
|
138
144
|
DEFAULT_RERANKER_PROVIDER = "local"
|
|
139
145
|
DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
146
|
+
DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
|
|
140
147
|
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
|
|
141
148
|
DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
|
|
142
149
|
DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
|
|
@@ -177,6 +184,7 @@ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (a
|
|
|
177
184
|
# Observations defaults (consolidated knowledge from facts)
|
|
178
185
|
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
179
186
|
DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
|
|
187
|
+
DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
|
|
180
188
|
|
|
181
189
|
# Database migrations
|
|
182
190
|
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
@@ -270,6 +278,7 @@ class HindsightConfig:
|
|
|
270
278
|
|
|
271
279
|
# Database
|
|
272
280
|
database_url: str
|
|
281
|
+
database_schema: str
|
|
273
282
|
|
|
274
283
|
# LLM (default, used as fallback for per-operation config)
|
|
275
284
|
llm_provider: str
|
|
@@ -298,6 +307,7 @@ class HindsightConfig:
|
|
|
298
307
|
# Embeddings
|
|
299
308
|
embeddings_provider: str
|
|
300
309
|
embeddings_local_model: str
|
|
310
|
+
embeddings_local_force_cpu: bool
|
|
301
311
|
embeddings_tei_url: str | None
|
|
302
312
|
embeddings_openai_base_url: str | None
|
|
303
313
|
embeddings_cohere_base_url: str | None
|
|
@@ -305,6 +315,8 @@ class HindsightConfig:
|
|
|
305
315
|
# Reranker
|
|
306
316
|
reranker_provider: str
|
|
307
317
|
reranker_local_model: str
|
|
318
|
+
reranker_local_force_cpu: bool
|
|
319
|
+
reranker_local_max_concurrent: int
|
|
308
320
|
reranker_tei_url: str | None
|
|
309
321
|
reranker_tei_batch_size: int
|
|
310
322
|
reranker_tei_max_concurrent: int
|
|
@@ -336,6 +348,7 @@ class HindsightConfig:
|
|
|
336
348
|
# Observations settings (consolidated knowledge from facts)
|
|
337
349
|
enable_observations: bool
|
|
338
350
|
consolidation_batch_size: int
|
|
351
|
+
consolidation_max_tokens: int
|
|
339
352
|
|
|
340
353
|
# Optimization flags
|
|
341
354
|
skip_llm_verification: bool
|
|
@@ -367,6 +380,7 @@ class HindsightConfig:
|
|
|
367
380
|
return cls(
|
|
368
381
|
# Database
|
|
369
382
|
database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
|
|
383
|
+
database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
|
|
370
384
|
# LLM
|
|
371
385
|
llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
|
|
372
386
|
llm_api_key=os.getenv(ENV_LLM_API_KEY),
|
|
@@ -390,12 +404,23 @@ class HindsightConfig:
|
|
|
390
404
|
# Embeddings
|
|
391
405
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
392
406
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
407
|
+
embeddings_local_force_cpu=os.getenv(
|
|
408
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
|
|
409
|
+
).lower()
|
|
410
|
+
in ("true", "1"),
|
|
393
411
|
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
|
|
394
412
|
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
|
|
395
413
|
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
|
|
396
414
|
# Reranker
|
|
397
415
|
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
|
|
398
416
|
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
|
|
417
|
+
reranker_local_force_cpu=os.getenv(
|
|
418
|
+
ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
|
|
419
|
+
).lower()
|
|
420
|
+
in ("true", "1"),
|
|
421
|
+
reranker_local_max_concurrent=int(
|
|
422
|
+
os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
|
|
423
|
+
),
|
|
399
424
|
reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
|
|
400
425
|
reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
|
|
401
426
|
reranker_tei_max_concurrent=int(
|
|
@@ -444,6 +469,9 @@ class HindsightConfig:
|
|
|
444
469
|
consolidation_batch_size=int(
|
|
445
470
|
os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
|
|
446
471
|
),
|
|
472
|
+
consolidation_max_tokens=int(
|
|
473
|
+
os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
|
|
474
|
+
),
|
|
447
475
|
# Database migrations
|
|
448
476
|
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
449
477
|
# Database connection pool
|
|
@@ -515,7 +543,7 @@ class HindsightConfig:
|
|
|
515
543
|
|
|
516
544
|
def log_config(self) -> None:
|
|
517
545
|
"""Log the current configuration (without sensitive values)."""
|
|
518
|
-
logger.info(f"Database: {self.database_url}")
|
|
546
|
+
logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
|
|
519
547
|
logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
|
|
520
548
|
if self.retain_llm_provider or self.retain_llm_model:
|
|
521
549
|
retain_provider = self.retain_llm_provider or self.llm_provider
|
|
@@ -639,28 +639,27 @@ async def _find_related_observations(
|
|
|
639
639
|
request_context: "RequestContext",
|
|
640
640
|
) -> list[dict[str, Any]]:
|
|
641
641
|
"""
|
|
642
|
-
Find observations related to the given query using
|
|
642
|
+
Find observations related to the given query using optimized recall.
|
|
643
643
|
|
|
644
644
|
IMPORTANT: We do NOT filter by tags here. Consolidation needs to see ALL
|
|
645
645
|
potentially related observations regardless of scope, so the LLM can
|
|
646
646
|
decide on tag routing (same scope update vs cross-scope create).
|
|
647
647
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
- BM25 text search (keyword matching)
|
|
651
|
-
- Entity-based retrieval (shared entities)
|
|
652
|
-
- Graph traversal (connected via entity links)
|
|
648
|
+
Uses max_tokens to naturally limit observations (no artificial count limit).
|
|
649
|
+
Includes source memories with dates for LLM context.
|
|
653
650
|
|
|
654
651
|
Returns:
|
|
655
|
-
List of related observations with their tags
|
|
652
|
+
List of related observations with their tags, source memories, and dates
|
|
656
653
|
"""
|
|
657
|
-
# Use recall to find related observations
|
|
658
|
-
#
|
|
659
|
-
|
|
654
|
+
# Use recall to find related observations with token budget
|
|
655
|
+
# max_tokens naturally limits how many observations are returned
|
|
656
|
+
from ...config import get_config
|
|
657
|
+
|
|
658
|
+
config = get_config()
|
|
660
659
|
recall_result = await memory_engine.recall_async(
|
|
661
660
|
bank_id=bank_id,
|
|
662
661
|
query=query,
|
|
663
|
-
max_tokens=
|
|
662
|
+
max_tokens=config.consolidation_max_tokens, # Token budget for observations (configurable)
|
|
664
663
|
fact_type=["observation"], # Only retrieve observations
|
|
665
664
|
request_context=request_context,
|
|
666
665
|
_quiet=True, # Suppress logging
|
|
@@ -668,43 +667,82 @@ async def _find_related_observations(
|
|
|
668
667
|
)
|
|
669
668
|
|
|
670
669
|
# If no observations returned, return empty list
|
|
671
|
-
# When fact_type=["observation"], results come back in `results` field
|
|
672
670
|
if not recall_result.results:
|
|
673
671
|
return []
|
|
674
672
|
|
|
675
|
-
#
|
|
673
|
+
# Batch fetch all observations in a single query (no artificial limit)
|
|
674
|
+
observation_ids = [uuid.UUID(obs.id) for obs in recall_result.results]
|
|
675
|
+
|
|
676
|
+
rows = await conn.fetch(
|
|
677
|
+
f"""
|
|
678
|
+
SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at,
|
|
679
|
+
occurred_start, occurred_end, mentioned_at
|
|
680
|
+
FROM {fq_table("memory_units")}
|
|
681
|
+
WHERE id = ANY($1) AND bank_id = $2 AND fact_type = 'observation'
|
|
682
|
+
""",
|
|
683
|
+
observation_ids,
|
|
684
|
+
bank_id,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# Build results list preserving recall order
|
|
688
|
+
id_to_row = {row["id"]: row for row in rows}
|
|
676
689
|
results = []
|
|
677
|
-
for obs in recall_result.results:
|
|
678
|
-
# Fetch full observation data from DB to get history, source_memory_ids, tags
|
|
679
|
-
row = await conn.fetchrow(
|
|
680
|
-
f"""
|
|
681
|
-
SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
|
|
682
|
-
FROM {fq_table("memory_units")}
|
|
683
|
-
WHERE id = $1 AND bank_id = $2 AND fact_type = 'observation'
|
|
684
|
-
""",
|
|
685
|
-
uuid.UUID(obs.id),
|
|
686
|
-
bank_id,
|
|
687
|
-
)
|
|
688
690
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
691
|
+
for obs in recall_result.results:
|
|
692
|
+
obs_id = uuid.UUID(obs.id)
|
|
693
|
+
if obs_id not in id_to_row:
|
|
694
|
+
continue
|
|
695
|
+
|
|
696
|
+
row = id_to_row[obs_id]
|
|
697
|
+
history = row["history"]
|
|
698
|
+
if isinstance(history, str):
|
|
699
|
+
history = json.loads(history)
|
|
700
|
+
elif history is None:
|
|
701
|
+
history = []
|
|
702
|
+
|
|
703
|
+
# Fetch source memories to include their text and dates
|
|
704
|
+
source_memory_ids = row["source_memory_ids"] or []
|
|
705
|
+
source_memories = []
|
|
706
|
+
|
|
707
|
+
if source_memory_ids:
|
|
708
|
+
source_rows = await conn.fetch(
|
|
709
|
+
f"""
|
|
710
|
+
SELECT text, occurred_start, occurred_end, mentioned_at, event_date
|
|
711
|
+
FROM {fq_table("memory_units")}
|
|
712
|
+
WHERE id = ANY($1) AND bank_id = $2
|
|
713
|
+
ORDER BY created_at ASC
|
|
714
|
+
LIMIT 5
|
|
715
|
+
""",
|
|
716
|
+
source_memory_ids[:5], # Limit to first 5 source memories for token efficiency
|
|
717
|
+
bank_id,
|
|
706
718
|
)
|
|
707
719
|
|
|
720
|
+
for src_row in source_rows:
|
|
721
|
+
source_memories.append(
|
|
722
|
+
{
|
|
723
|
+
"text": src_row["text"],
|
|
724
|
+
"occurred_start": src_row["occurred_start"],
|
|
725
|
+
"occurred_end": src_row["occurred_end"],
|
|
726
|
+
"mentioned_at": src_row["mentioned_at"],
|
|
727
|
+
"event_date": src_row["event_date"],
|
|
728
|
+
}
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
results.append(
|
|
732
|
+
{
|
|
733
|
+
"id": row["id"],
|
|
734
|
+
"text": row["text"],
|
|
735
|
+
"proof_count": row["proof_count"] or 1,
|
|
736
|
+
"tags": row["tags"] or [],
|
|
737
|
+
"source_memories": source_memories,
|
|
738
|
+
"occurred_start": row["occurred_start"],
|
|
739
|
+
"occurred_end": row["occurred_end"],
|
|
740
|
+
"mentioned_at": row["mentioned_at"],
|
|
741
|
+
"created_at": row["created_at"],
|
|
742
|
+
"updated_at": row["updated_at"],
|
|
743
|
+
}
|
|
744
|
+
)
|
|
745
|
+
|
|
708
746
|
return results
|
|
709
747
|
|
|
710
748
|
|
|
@@ -732,14 +770,43 @@ async def _consolidate_with_llm(
|
|
|
732
770
|
- {"action": "create", "text": "...", "reason": "..."}
|
|
733
771
|
- [] if fact is purely ephemeral (no durable knowledge)
|
|
734
772
|
"""
|
|
735
|
-
# Format observations
|
|
773
|
+
# Format observations as JSON with source memories and dates
|
|
736
774
|
if observations:
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
775
|
+
obs_list = []
|
|
776
|
+
for obs in observations:
|
|
777
|
+
obs_data = {
|
|
778
|
+
"id": str(obs["id"]),
|
|
779
|
+
"text": obs["text"],
|
|
780
|
+
"proof_count": obs["proof_count"],
|
|
781
|
+
"tags": obs["tags"],
|
|
782
|
+
"created_at": obs["created_at"].isoformat() if obs.get("created_at") else None,
|
|
783
|
+
"updated_at": obs["updated_at"].isoformat() if obs.get("updated_at") else None,
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
# Include temporal info if available
|
|
787
|
+
if obs.get("occurred_start"):
|
|
788
|
+
obs_data["occurred_start"] = obs["occurred_start"].isoformat()
|
|
789
|
+
if obs.get("occurred_end"):
|
|
790
|
+
obs_data["occurred_end"] = obs["occurred_end"].isoformat()
|
|
791
|
+
if obs.get("mentioned_at"):
|
|
792
|
+
obs_data["mentioned_at"] = obs["mentioned_at"].isoformat()
|
|
793
|
+
|
|
794
|
+
# Include source memories (up to 3 for brevity)
|
|
795
|
+
if obs.get("source_memories"):
|
|
796
|
+
obs_data["source_memories"] = [
|
|
797
|
+
{
|
|
798
|
+
"text": sm["text"],
|
|
799
|
+
"event_date": sm["event_date"].isoformat() if sm.get("event_date") else None,
|
|
800
|
+
"occurred_start": sm["occurred_start"].isoformat() if sm.get("occurred_start") else None,
|
|
801
|
+
}
|
|
802
|
+
for sm in obs["source_memories"][:3] # Limit to 3 for token efficiency
|
|
803
|
+
]
|
|
804
|
+
|
|
805
|
+
obs_list.append(obs_data)
|
|
806
|
+
|
|
807
|
+
observations_text = json.dumps(obs_list, indent=2)
|
|
741
808
|
else:
|
|
742
|
-
observations_text = "
|
|
809
|
+
observations_text = "[]"
|
|
743
810
|
|
|
744
811
|
# Only include mission section if mission is set and not the default
|
|
745
812
|
mission_section = ""
|
|
@@ -47,23 +47,31 @@ CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowle
|
|
|
47
47
|
{mission_section}
|
|
48
48
|
NEW FACT: {fact_text}
|
|
49
49
|
|
|
50
|
-
EXISTING OBSERVATIONS:
|
|
50
|
+
EXISTING OBSERVATIONS (JSON array with source memories and dates):
|
|
51
51
|
{observations_text}
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
Each observation includes:
|
|
54
|
+
- id: unique identifier for updating
|
|
55
|
+
- text: the observation content
|
|
56
|
+
- proof_count: number of supporting memories
|
|
57
|
+
- tags: visibility scope (handled automatically)
|
|
58
|
+
- created_at/updated_at: when observation was created/modified
|
|
59
|
+
- occurred_start/occurred_end: temporal range of source facts
|
|
60
|
+
- source_memories: array of supporting facts with their text and dates
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
Instructions:
|
|
63
|
+
1. Extract DURABLE KNOWLEDGE from the new fact (not ephemeral state)
|
|
64
|
+
2. Review source_memories in existing observations to understand evidence
|
|
65
|
+
3. Check dates to detect contradictions or updates
|
|
66
|
+
4. Compare with observations:
|
|
67
|
+
- Same topic → UPDATE with learning_id
|
|
68
|
+
- New topic → CREATE new observation
|
|
69
|
+
- Purely ephemeral → return []
|
|
70
|
+
|
|
71
|
+
Output JSON array of actions:
|
|
60
72
|
[
|
|
61
|
-
{{"action": "update", "learning_id": "uuid", "text": "updated
|
|
73
|
+
{{"action": "update", "learning_id": "uuid-from-observations", "text": "updated knowledge", "reason": "..."}},
|
|
62
74
|
{{"action": "create", "text": "new durable knowledge", "reason": "..."}}
|
|
63
75
|
]
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
[]
|
|
67
|
-
|
|
68
|
-
If no observations exist and fact contains durable knowledge:
|
|
69
|
-
[{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""
|
|
77
|
+
Return [] if fact contains no durable knowledge."""
|
|
@@ -20,6 +20,7 @@ from ..config import (
|
|
|
20
20
|
DEFAULT_RERANKER_FLASHRANK_CACHE_DIR,
|
|
21
21
|
DEFAULT_RERANKER_FLASHRANK_MODEL,
|
|
22
22
|
DEFAULT_RERANKER_LITELLM_MODEL,
|
|
23
|
+
DEFAULT_RERANKER_LOCAL_FORCE_CPU,
|
|
23
24
|
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT,
|
|
24
25
|
DEFAULT_RERANKER_LOCAL_MODEL,
|
|
25
26
|
DEFAULT_RERANKER_PROVIDER,
|
|
@@ -33,6 +34,7 @@ from ..config import (
|
|
|
33
34
|
ENV_RERANKER_FLASHRANK_CACHE_DIR,
|
|
34
35
|
ENV_RERANKER_FLASHRANK_MODEL,
|
|
35
36
|
ENV_RERANKER_LITELLM_MODEL,
|
|
37
|
+
ENV_RERANKER_LOCAL_FORCE_CPU,
|
|
36
38
|
ENV_RERANKER_LOCAL_MAX_CONCURRENT,
|
|
37
39
|
ENV_RERANKER_LOCAL_MODEL,
|
|
38
40
|
ENV_RERANKER_PROVIDER,
|
|
@@ -99,7 +101,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
99
101
|
_executor: ThreadPoolExecutor | None = None
|
|
100
102
|
_max_concurrent: int = 4 # Limit concurrent CPU-bound reranking calls
|
|
101
103
|
|
|
102
|
-
def __init__(self, model_name: str | None = None, max_concurrent: int = 4):
|
|
104
|
+
def __init__(self, model_name: str | None = None, max_concurrent: int = 4, force_cpu: bool = False):
|
|
103
105
|
"""
|
|
104
106
|
Initialize local SentenceTransformers cross-encoder.
|
|
105
107
|
|
|
@@ -108,8 +110,11 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
108
110
|
Default: cross-encoder/ms-marco-MiniLM-L-6-v2
|
|
109
111
|
max_concurrent: Maximum concurrent reranking calls (default: 2).
|
|
110
112
|
Higher values may cause CPU thrashing under load.
|
|
113
|
+
force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
|
|
114
|
+
Default: False
|
|
111
115
|
"""
|
|
112
116
|
self.model_name = model_name or DEFAULT_RERANKER_LOCAL_MODEL
|
|
117
|
+
self.force_cpu = force_cpu
|
|
113
118
|
self._model = None
|
|
114
119
|
LocalSTCrossEncoder._max_concurrent = max_concurrent
|
|
115
120
|
|
|
@@ -139,13 +144,23 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
139
144
|
# after loading, which conflicts with accelerate's device_map handling.
|
|
140
145
|
import torch
|
|
141
146
|
|
|
142
|
-
#
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
if has_gpu:
|
|
146
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
147
|
-
else:
|
|
147
|
+
# Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
|
|
148
|
+
if self.force_cpu:
|
|
148
149
|
device = "cpu"
|
|
150
|
+
logger.info("Reranker: forcing CPU mode (HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU=1)")
|
|
151
|
+
else:
|
|
152
|
+
# Check for GPU (CUDA) or Apple Silicon (MPS)
|
|
153
|
+
# Wrap in try-except to gracefully handle any device detection issues
|
|
154
|
+
# (e.g., in CI environments or when PyTorch is built without GPU support)
|
|
155
|
+
device = "cpu" # Default to CPU
|
|
156
|
+
try:
|
|
157
|
+
has_gpu = torch.cuda.is_available() or (
|
|
158
|
+
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
159
|
+
)
|
|
160
|
+
if has_gpu:
|
|
161
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
|
|
149
164
|
|
|
150
165
|
self._model = CrossEncoder(
|
|
151
166
|
self.model_name,
|
|
@@ -211,12 +226,19 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
211
226
|
)
|
|
212
227
|
|
|
213
228
|
# Determine device based on hardware availability
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if has_gpu:
|
|
217
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
218
|
-
else:
|
|
229
|
+
if self.force_cpu:
|
|
219
230
|
device = "cpu"
|
|
231
|
+
else:
|
|
232
|
+
# Wrap in try-except to gracefully handle any device detection issues
|
|
233
|
+
device = "cpu" # Default to CPU
|
|
234
|
+
try:
|
|
235
|
+
has_gpu = torch.cuda.is_available() or (
|
|
236
|
+
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
237
|
+
)
|
|
238
|
+
if has_gpu:
|
|
239
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
|
|
220
242
|
|
|
221
243
|
self._model = CrossEncoder(
|
|
222
244
|
self.model_name,
|
|
@@ -873,29 +895,33 @@ class LiteLLMCrossEncoder(CrossEncoderModel):
|
|
|
873
895
|
|
|
874
896
|
def create_cross_encoder_from_env() -> CrossEncoderModel:
|
|
875
897
|
"""
|
|
876
|
-
Create a CrossEncoderModel instance based on
|
|
898
|
+
Create a CrossEncoderModel instance based on configuration.
|
|
877
899
|
|
|
878
|
-
|
|
900
|
+
Reads configuration via get_config() to ensure consistency across the codebase.
|
|
879
901
|
|
|
880
902
|
Returns:
|
|
881
903
|
Configured CrossEncoderModel instance
|
|
882
904
|
"""
|
|
883
|
-
|
|
905
|
+
from ..config import get_config
|
|
906
|
+
|
|
907
|
+
config = get_config()
|
|
908
|
+
provider = config.reranker_provider.lower()
|
|
884
909
|
|
|
885
910
|
if provider == "tei":
|
|
886
|
-
url =
|
|
911
|
+
url = config.reranker_tei_url
|
|
887
912
|
if not url:
|
|
888
913
|
raise ValueError(f"{ENV_RERANKER_TEI_URL} is required when {ENV_RERANKER_PROVIDER} is 'tei'")
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
914
|
+
return RemoteTEICrossEncoder(
|
|
915
|
+
base_url=url,
|
|
916
|
+
batch_size=config.reranker_tei_batch_size,
|
|
917
|
+
max_concurrent=config.reranker_tei_max_concurrent,
|
|
918
|
+
)
|
|
892
919
|
elif provider == "local":
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
920
|
+
return LocalSTCrossEncoder(
|
|
921
|
+
model_name=config.reranker_local_model,
|
|
922
|
+
max_concurrent=config.reranker_local_max_concurrent,
|
|
923
|
+
force_cpu=config.reranker_local_force_cpu,
|
|
897
924
|
)
|
|
898
|
-
return LocalSTCrossEncoder(model_name=model_name, max_concurrent=max_concurrent)
|
|
899
925
|
elif provider == "cohere":
|
|
900
926
|
api_key = os.environ.get(ENV_COHERE_API_KEY)
|
|
901
927
|
if not api_key:
|
|
@@ -18,6 +18,7 @@ import httpx
|
|
|
18
18
|
from ..config import (
|
|
19
19
|
DEFAULT_EMBEDDINGS_COHERE_MODEL,
|
|
20
20
|
DEFAULT_EMBEDDINGS_LITELLM_MODEL,
|
|
21
|
+
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU,
|
|
21
22
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL,
|
|
22
23
|
DEFAULT_EMBEDDINGS_OPENAI_MODEL,
|
|
23
24
|
DEFAULT_EMBEDDINGS_PROVIDER,
|
|
@@ -26,6 +27,7 @@ from ..config import (
|
|
|
26
27
|
ENV_EMBEDDINGS_COHERE_BASE_URL,
|
|
27
28
|
ENV_EMBEDDINGS_COHERE_MODEL,
|
|
28
29
|
ENV_EMBEDDINGS_LITELLM_MODEL,
|
|
30
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU,
|
|
29
31
|
ENV_EMBEDDINGS_LOCAL_MODEL,
|
|
30
32
|
ENV_EMBEDDINGS_OPENAI_API_KEY,
|
|
31
33
|
ENV_EMBEDDINGS_OPENAI_BASE_URL,
|
|
@@ -92,15 +94,18 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
92
94
|
The embedding dimension is auto-detected from the model.
|
|
93
95
|
"""
|
|
94
96
|
|
|
95
|
-
def __init__(self, model_name: str | None = None):
|
|
97
|
+
def __init__(self, model_name: str | None = None, force_cpu: bool = False):
|
|
96
98
|
"""
|
|
97
99
|
Initialize local SentenceTransformers embeddings.
|
|
98
100
|
|
|
99
101
|
Args:
|
|
100
102
|
model_name: Name of the SentenceTransformer model to use.
|
|
101
103
|
Default: BAAI/bge-small-en-v1.5
|
|
104
|
+
force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
|
|
105
|
+
Default: False
|
|
102
106
|
"""
|
|
103
107
|
self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
|
|
108
|
+
self.force_cpu = force_cpu
|
|
104
109
|
self._model = None
|
|
105
110
|
self._dimension: int | None = None
|
|
106
111
|
|
|
@@ -134,13 +139,23 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
134
139
|
# which can cause issues when accelerate is installed but no GPU is available.
|
|
135
140
|
import torch
|
|
136
141
|
|
|
137
|
-
#
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if has_gpu:
|
|
141
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
142
|
-
else:
|
|
142
|
+
# Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
|
|
143
|
+
if self.force_cpu:
|
|
143
144
|
device = "cpu"
|
|
145
|
+
logger.info("Embeddings: forcing CPU mode")
|
|
146
|
+
else:
|
|
147
|
+
# Check for GPU (CUDA) or Apple Silicon (MPS)
|
|
148
|
+
# Wrap in try-except to gracefully handle any device detection issues
|
|
149
|
+
# (e.g., in CI environments or when PyTorch is built without GPU support)
|
|
150
|
+
device = "cpu" # Default to CPU
|
|
151
|
+
try:
|
|
152
|
+
has_gpu = torch.cuda.is_available() or (
|
|
153
|
+
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
154
|
+
)
|
|
155
|
+
if has_gpu:
|
|
156
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
|
|
144
159
|
|
|
145
160
|
self._model = SentenceTransformer(
|
|
146
161
|
self.model_name,
|
|
@@ -199,12 +214,19 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
199
214
|
)
|
|
200
215
|
|
|
201
216
|
# Determine device based on hardware availability
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
if has_gpu:
|
|
205
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
206
|
-
else:
|
|
217
|
+
if self.force_cpu:
|
|
207
218
|
device = "cpu"
|
|
219
|
+
else:
|
|
220
|
+
# Wrap in try-except to gracefully handle any device detection issues
|
|
221
|
+
device = "cpu" # Default to CPU
|
|
222
|
+
try:
|
|
223
|
+
has_gpu = torch.cuda.is_available() or (
|
|
224
|
+
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
225
|
+
)
|
|
226
|
+
if has_gpu:
|
|
227
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
|
|
208
230
|
|
|
209
231
|
self._model = SentenceTransformer(
|
|
210
232
|
self.model_name,
|
|
@@ -770,24 +792,28 @@ class LiteLLMEmbeddings(Embeddings):
|
|
|
770
792
|
|
|
771
793
|
def create_embeddings_from_env() -> Embeddings:
|
|
772
794
|
"""
|
|
773
|
-
Create an Embeddings instance based on
|
|
795
|
+
Create an Embeddings instance based on configuration.
|
|
774
796
|
|
|
775
|
-
|
|
797
|
+
Reads configuration via get_config() to ensure consistency across the codebase.
|
|
776
798
|
|
|
777
799
|
Returns:
|
|
778
800
|
Configured Embeddings instance
|
|
779
801
|
"""
|
|
780
|
-
|
|
802
|
+
from ..config import get_config
|
|
803
|
+
|
|
804
|
+
config = get_config()
|
|
805
|
+
provider = config.embeddings_provider.lower()
|
|
781
806
|
|
|
782
807
|
if provider == "tei":
|
|
783
|
-
url =
|
|
808
|
+
url = config.embeddings_tei_url
|
|
784
809
|
if not url:
|
|
785
810
|
raise ValueError(f"{ENV_EMBEDDINGS_TEI_URL} is required when {ENV_EMBEDDINGS_PROVIDER} is 'tei'")
|
|
786
811
|
return RemoteTEIEmbeddings(base_url=url)
|
|
787
812
|
elif provider == "local":
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
813
|
+
return LocalSTEmbeddings(
|
|
814
|
+
model_name=config.embeddings_local_model,
|
|
815
|
+
force_cpu=config.embeddings_local_force_cpu,
|
|
816
|
+
)
|
|
791
817
|
elif provider == "openai":
|
|
792
818
|
# Use dedicated embeddings API key, or fall back to LLM API key
|
|
793
819
|
api_key = os.environ.get(ENV_EMBEDDINGS_OPENAI_API_KEY) or os.environ.get(ENV_LLM_API_KEY)
|
|
@@ -23,12 +23,17 @@ from ..metrics import get_metrics_collector
|
|
|
23
23
|
from .db_budget import budgeted_operation
|
|
24
24
|
|
|
25
25
|
# Context variable for current schema (async-safe, per-task isolation)
|
|
26
|
-
|
|
26
|
+
# Note: default is None, actual default comes from config via get_current_schema()
|
|
27
|
+
_current_schema: contextvars.ContextVar[str | None] = contextvars.ContextVar("current_schema", default=None)
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
def get_current_schema() -> str:
|
|
30
|
-
"""Get the current schema from context (default
|
|
31
|
-
|
|
31
|
+
"""Get the current schema from context (falls back to config default)."""
|
|
32
|
+
schema = _current_schema.get()
|
|
33
|
+
if schema is None:
|
|
34
|
+
# Fall back to configured default schema
|
|
35
|
+
return get_config().database_schema
|
|
36
|
+
return schema
|
|
32
37
|
|
|
33
38
|
|
|
34
39
|
def fq_table(table_name: str) -> str:
|
|
@@ -881,11 +886,12 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
881
886
|
if not self.db_url:
|
|
882
887
|
raise ValueError("Database URL is required for migrations")
|
|
883
888
|
logger.info("Running database migrations...")
|
|
884
|
-
|
|
889
|
+
# Use configured database schema for migrations (defaults to "public")
|
|
890
|
+
run_migrations(self.db_url, schema=get_config().database_schema)
|
|
885
891
|
|
|
886
892
|
# Ensure embedding column dimension matches the model's dimension
|
|
887
893
|
# This is done after migrations and after embeddings.initialize()
|
|
888
|
-
ensure_embedding_dimension(self.db_url, self.embeddings.dimension)
|
|
894
|
+
ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
|
|
889
895
|
|
|
890
896
|
logger.info(f"Connecting to PostgreSQL at {self.db_url}")
|
|
891
897
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Built-in tenant extension implementations."""
|
|
2
2
|
|
|
3
|
+
from hindsight_api.config import get_config
|
|
3
4
|
from hindsight_api.extensions.tenant import AuthenticationError, Tenant, TenantContext, TenantExtension
|
|
4
5
|
from hindsight_api.models import RequestContext
|
|
5
6
|
|
|
@@ -10,11 +11,13 @@ class ApiKeyTenantExtension(TenantExtension):
|
|
|
10
11
|
|
|
11
12
|
This is a simple implementation that:
|
|
12
13
|
1. Validates the API key matches HINDSIGHT_API_TENANT_API_KEY
|
|
13
|
-
2. Returns
|
|
14
|
+
2. Returns the configured schema (HINDSIGHT_API_DATABASE_SCHEMA, default 'public')
|
|
15
|
+
for all authenticated requests
|
|
14
16
|
|
|
15
17
|
Configuration:
|
|
16
18
|
HINDSIGHT_API_TENANT_EXTENSION=hindsight_api.extensions.builtin.tenant:ApiKeyTenantExtension
|
|
17
19
|
HINDSIGHT_API_TENANT_API_KEY=your-secret-key
|
|
20
|
+
HINDSIGHT_API_DATABASE_SCHEMA=your-schema (optional, defaults to 'public')
|
|
18
21
|
|
|
19
22
|
For multi-tenant setups with separate schemas per tenant, implement a custom
|
|
20
23
|
TenantExtension that looks up the schema based on the API key or token claims.
|
|
@@ -27,11 +30,11 @@ class ApiKeyTenantExtension(TenantExtension):
|
|
|
27
30
|
raise ValueError("HINDSIGHT_API_TENANT_API_KEY is required when using ApiKeyTenantExtension")
|
|
28
31
|
|
|
29
32
|
async def authenticate(self, context: RequestContext) -> TenantContext:
|
|
30
|
-
"""Validate API key and return
|
|
33
|
+
"""Validate API key and return configured schema context."""
|
|
31
34
|
if context.api_key != self.expected_api_key:
|
|
32
35
|
raise AuthenticationError("Invalid API key")
|
|
33
|
-
return TenantContext(schema_name=
|
|
36
|
+
return TenantContext(schema_name=get_config().database_schema)
|
|
34
37
|
|
|
35
38
|
async def list_tenants(self) -> list[Tenant]:
|
|
36
|
-
"""Return
|
|
37
|
-
return [Tenant(schema=
|
|
39
|
+
"""Return configured schema for single-tenant setup."""
|
|
40
|
+
return [Tenant(schema=get_config().database_schema)]
|
hindsight_api/main.py
CHANGED
|
@@ -140,6 +140,13 @@ def main():
|
|
|
140
140
|
args.port = DEFAULT_DAEMON_PORT
|
|
141
141
|
args.host = "127.0.0.1" # Only bind to localhost for security
|
|
142
142
|
|
|
143
|
+
# Force CPU mode for daemon to avoid macOS MPS/XPC issues
|
|
144
|
+
# MPS (Metal Performance Shaders) has unstable XPC connections in background processes
|
|
145
|
+
# that can cause assertion failures and process crashes at the C++ level
|
|
146
|
+
# (which Python exception handlers cannot catch)
|
|
147
|
+
os.environ["HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"] = "1"
|
|
148
|
+
os.environ["HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"] = "1"
|
|
149
|
+
|
|
143
150
|
# Check if another daemon is already running
|
|
144
151
|
daemon_lock = DaemonLock()
|
|
145
152
|
if not daemon_lock.acquire():
|
|
@@ -170,6 +177,7 @@ def main():
|
|
|
170
177
|
if args.log_level != config.log_level:
|
|
171
178
|
config = HindsightConfig(
|
|
172
179
|
database_url=config.database_url,
|
|
180
|
+
database_schema=config.database_schema,
|
|
173
181
|
llm_provider=config.llm_provider,
|
|
174
182
|
llm_api_key=config.llm_api_key,
|
|
175
183
|
llm_model=config.llm_model,
|
|
@@ -190,11 +198,14 @@ def main():
|
|
|
190
198
|
consolidation_llm_base_url=config.consolidation_llm_base_url,
|
|
191
199
|
embeddings_provider=config.embeddings_provider,
|
|
192
200
|
embeddings_local_model=config.embeddings_local_model,
|
|
201
|
+
embeddings_local_force_cpu=config.embeddings_local_force_cpu,
|
|
193
202
|
embeddings_tei_url=config.embeddings_tei_url,
|
|
194
203
|
embeddings_openai_base_url=config.embeddings_openai_base_url,
|
|
195
204
|
embeddings_cohere_base_url=config.embeddings_cohere_base_url,
|
|
196
205
|
reranker_provider=config.reranker_provider,
|
|
197
206
|
reranker_local_model=config.reranker_local_model,
|
|
207
|
+
reranker_local_force_cpu=config.reranker_local_force_cpu,
|
|
208
|
+
reranker_local_max_concurrent=config.reranker_local_max_concurrent,
|
|
198
209
|
reranker_tei_url=config.reranker_tei_url,
|
|
199
210
|
reranker_tei_batch_size=config.reranker_tei_batch_size,
|
|
200
211
|
reranker_tei_max_concurrent=config.reranker_tei_max_concurrent,
|
|
@@ -217,6 +228,7 @@ def main():
|
|
|
217
228
|
retain_observations_async=config.retain_observations_async,
|
|
218
229
|
enable_observations=config.enable_observations,
|
|
219
230
|
consolidation_batch_size=config.consolidation_batch_size,
|
|
231
|
+
consolidation_max_tokens=config.consolidation_max_tokens,
|
|
220
232
|
skip_llm_verification=config.skip_llm_verification,
|
|
221
233
|
lazy_reranker=config.lazy_reranker,
|
|
222
234
|
run_migrations_on_startup=config.run_migrations_on_startup,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
hindsight_api/__init__.py,sha256=
|
|
1
|
+
hindsight_api/__init__.py,sha256=y8um49GprBg-BgGNjmSIRwE5PFOQWNmyio0m4xAiSHo,1197
|
|
2
2
|
hindsight_api/banner.py,sha256=BXn-jhkXe4xi-YV4JeuaVvjYhTMs96O43XoOMv4Cd28,4591
|
|
3
|
-
hindsight_api/config.py,sha256=
|
|
3
|
+
hindsight_api/config.py,sha256=PTnOLxdq7S4xBBuUA1ADLMKXylhphUQf7-DjSwZg7l0,26497
|
|
4
4
|
hindsight_api/daemon.py,sha256=3CKcO_ENQ57dIWrTsmYUj-V4zvoAB1toNtVh3EVkg-c,5982
|
|
5
|
-
hindsight_api/main.py,sha256=
|
|
5
|
+
hindsight_api/main.py,sha256=7poaTkS1U4E0SEKMqJ1m-L_IQKIikb-bIcay-btqXU8,14178
|
|
6
6
|
hindsight_api/mcp_local.py,sha256=fJnCxMBc79GlBZrma94Ux6g-GVuh-W66194cqQdkKJQ,5613
|
|
7
7
|
hindsight_api/mcp_tools.py,sha256=KGzgDeRoChwgt3HB-OoUHcWgHz6ELequLIkw6u7kkyo,19669
|
|
8
8
|
hindsight_api/metrics.py,sha256=go3X7wyFAPkc55HFvu7esiaJXDrUsrSrC8Pq5NjcqU0,20692
|
|
@@ -39,24 +39,24 @@ hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations
|
|
|
39
39
|
hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py,sha256=uvil81f-4ag2dIxBXUGKZ5vxkqdNQRpxCWj_iVih09w,1355
|
|
40
40
|
hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py,sha256=Mw68uW8PK-SaHcYcqb41vWI0R22t70SSasNS2Myeoec,1656
|
|
41
41
|
hindsight_api/api/__init__.py,sha256=npF0AAy8WJhHF5a9ehkNn9_iYLk7RQOk2gdkdFb49Hk,3840
|
|
42
|
-
hindsight_api/api/http.py,sha256=
|
|
42
|
+
hindsight_api/api/http.py,sha256=5YFLGWDILApMf4lLxpc72S9EtnOIOvuu8c3K124THPc,133363
|
|
43
43
|
hindsight_api/api/mcp.py,sha256=zV0TmkxKEqwhLIfNAdezYgsZ1PF9Lo8j5_lD73ULpKU,6707
|
|
44
44
|
hindsight_api/engine/__init__.py,sha256=-BwaSwG9fTT_BBO0c_2MBkxG6-tGdclSzIqsgHw4cnw,1633
|
|
45
|
-
hindsight_api/engine/cross_encoder.py,sha256=
|
|
45
|
+
hindsight_api/engine/cross_encoder.py,sha256=rA-iFXO-hXOx6BPCUi_Q6p-wm93eE8Bd6cuivFkeKBY,36029
|
|
46
46
|
hindsight_api/engine/db_budget.py,sha256=1OmZiuszpuEaYz355QlOqwaupXPd9FrnbyENsFboBkg,8642
|
|
47
47
|
hindsight_api/engine/db_utils.py,sha256=Fq1pXETt8ZPhkWYjrcGbgL6glrwmCGWh3_lYJgHqQPo,3067
|
|
48
|
-
hindsight_api/engine/embeddings.py,sha256=
|
|
48
|
+
hindsight_api/engine/embeddings.py,sha256=FAFf7mb7Woz0BoJmBF_m1y3FZt8Ty0yw3ZYaYSHrtMg,30736
|
|
49
49
|
hindsight_api/engine/entity_resolver.py,sha256=qVvWJHnbGEfh0iUFtc1dbM3IUNwPMsQsmg2rMgiX2DY,23794
|
|
50
50
|
hindsight_api/engine/interface.py,sha256=rldxkBmp_bqEeTBD713uZeXvrqJB9Ix1L62gazlNEi0,16899
|
|
51
51
|
hindsight_api/engine/llm_wrapper.py,sha256=Mh38zSlNGhsbN0f2VA1JGZ52HRab_ndcKqvEhyajgK0,68084
|
|
52
|
-
hindsight_api/engine/memory_engine.py,sha256=
|
|
52
|
+
hindsight_api/engine/memory_engine.py,sha256=lYWrQYzHib0UPiGNFxe_5QXxkJv14DjiG93SZ1sHo7c,231906
|
|
53
53
|
hindsight_api/engine/query_analyzer.py,sha256=7APe0MjBcUxjivcMlM03PmMk_w5FjWvlEe20yAJlHlc,19741
|
|
54
54
|
hindsight_api/engine/response_models.py,sha256=1fNAFPztlmYfOaoRfwYyrhzdPBO9UL8QHFNXW6Lmjgg,16322
|
|
55
55
|
hindsight_api/engine/task_backend.py,sha256=zDH24tTwIH_59eFpQzepv0KkZXOIVMpmDkrg1Y5khDA,8172
|
|
56
56
|
hindsight_api/engine/utils.py,sha256=OtEFDViKcCpFmKN3Qir8YV4zp0kv7iaREcgDXCkwShw,2089
|
|
57
57
|
hindsight_api/engine/consolidation/__init__.py,sha256=qEUPy0R7akNoAooQL1TAt2rVasjvnXTcNzh2zpN0flc,160
|
|
58
|
-
hindsight_api/engine/consolidation/consolidator.py,sha256=
|
|
59
|
-
hindsight_api/engine/consolidation/prompts.py,sha256=
|
|
58
|
+
hindsight_api/engine/consolidation/consolidator.py,sha256=E2wEsSnHVFEFEahq51QCkp4zGZW-LZxMUxgZh49cEt8,33037
|
|
59
|
+
hindsight_api/engine/consolidation/prompts.py,sha256=UgJJvXeG7bH0h-N0AWlUsmWoYxfJY2gIP_3f9xjCvSc,3422
|
|
60
60
|
hindsight_api/engine/directives/__init__.py,sha256=5ZxaRqZVyJckbGElaI2DMRMBtnj-qYkxRKdnOHBwovA,118
|
|
61
61
|
hindsight_api/engine/directives/models.py,sha256=PKxvmhW1-fjBITAOBu7RKX5Lj61c2jdsTaX8ADelKag,1523
|
|
62
62
|
hindsight_api/engine/mental_models/__init__.py,sha256=TU6dSPyIsevFDgY6PLYctDsk5K4SA4pFSQnmQvbdRlA,488
|
|
@@ -66,7 +66,7 @@ hindsight_api/engine/reflect/agent.py,sha256=mr0rUrwdnDISt9iyuspI_ZhL4qfyLTWAzJO
|
|
|
66
66
|
hindsight_api/engine/reflect/models.py,sha256=ZnMCi4sta5bSVGRRNatTA2jNSun59mWEVVq6Dkmjq1Q,5185
|
|
67
67
|
hindsight_api/engine/reflect/observations.py,sha256=TMivY5ujrJAOhG3OFFTGEuwbW27AOjyD5l0JlDfbyJM,6777
|
|
68
68
|
hindsight_api/engine/reflect/prompts.py,sha256=AewEMBJ_fk0pAVtWQagPMY_0lR9mHNJQ7kF4I5ruWmg,18442
|
|
69
|
-
hindsight_api/engine/reflect/tools.py,sha256=
|
|
69
|
+
hindsight_api/engine/reflect/tools.py,sha256=tD5l3ODhtQ3bcD5dvbsRipWz902H7tz4M_TDEZhpazU,14550
|
|
70
70
|
hindsight_api/engine/reflect/tools_schema.py,sha256=cdRmRwge5aHlVY19Y0Tlp5zEcFCnvxUc2GAXmwaNcMU,9944
|
|
71
71
|
hindsight_api/engine/retain/__init__.py,sha256=t6q3-_kf4iYTl9j2PVB6laqMSs6UuPeXBSYMW6HT1sA,1152
|
|
72
72
|
hindsight_api/engine/retain/bank_utils.py,sha256=LsFiB5rqyI1deL4rePAmPdmxMBb4paDA8UqMvY2Y40g,7696
|
|
@@ -102,11 +102,11 @@ hindsight_api/extensions/loader.py,sha256=UwGM0XH7zHGng_xfHUY0VbOQemj9DmjuDaMst1
|
|
|
102
102
|
hindsight_api/extensions/operation_validator.py,sha256=ciXvTtlX4c5VcLze5cVbuaD6B-10IxnfgnNhbY8LGLc,13360
|
|
103
103
|
hindsight_api/extensions/tenant.py,sha256=0LraksQ1gzsOYLEGrx2q2F0or596Ywfo_MqD1FJMNRM,2617
|
|
104
104
|
hindsight_api/extensions/builtin/__init__.py,sha256=hLx2oFYZ1JtZhTWfab6AYcR02SWP2gIdbEqnZezT8ek,526
|
|
105
|
-
hindsight_api/extensions/builtin/tenant.py,sha256=
|
|
105
|
+
hindsight_api/extensions/builtin/tenant.py,sha256=R7jfNR41deGWqQB5P8Qk5njy1bZgvemcTpkXDRiAZBA,1835
|
|
106
106
|
hindsight_api/worker/__init__.py,sha256=hzpMLvOfgL2KKrrik_9ouvEzCdvJSrH-pj5UdFK63J0,256
|
|
107
107
|
hindsight_api/worker/main.py,sha256=1OrQdHL-6u-311W0XMAoLHOXCu8MOETiQkR0TQ23qh8,9547
|
|
108
108
|
hindsight_api/worker/poller.py,sha256=l-y8xpekKZ7zcGo83osOsbFd_tBi49LqrAJsN-mxiMY,19306
|
|
109
|
-
hindsight_api-0.4.
|
|
110
|
-
hindsight_api-0.4.
|
|
111
|
-
hindsight_api-0.4.
|
|
112
|
-
hindsight_api-0.4.
|
|
109
|
+
hindsight_api-0.4.1.dist-info/METADATA,sha256=7qQlHBih3InJcpEZv3UAWzBkhhgQ0DgLKayw-hmp9VI,5760
|
|
110
|
+
hindsight_api-0.4.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
111
|
+
hindsight_api-0.4.1.dist-info/entry_points.txt,sha256=1-mxPbRGL_Byf9ZrHYkPW-TEgLYFcwCiSFCxOgI_3vM,206
|
|
112
|
+
hindsight_api-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|