hindsight-api 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/.gitignore +4 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/PKG-INFO +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/__init__.py +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/api/http.py +3 -2
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/config.py +114 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/daemon.py +4 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/consolidator.py +145 -49
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/prompts.py +21 -13
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/cross_encoder.py +43 -109
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/embeddings.py +35 -99
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/memory_engine.py +11 -5
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/tools.py +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/fact_extraction.py +16 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/builtin/tenant.py +8 -5
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/main.py +26 -2
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/pyproject.toml +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/README.md +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/admin/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/admin/cli.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/README +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/env.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/script.py.mako +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/api/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/api/mcp.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/banner.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/db_budget.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/db_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/directives/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/directives/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/entity_resolver.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/interface.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/llm_wrapper.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/mental_models/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/mental_models/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/query_analyzer.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/agent.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/observations.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/prompts.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/tools_schema.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/response_models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/bank_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/chunk_storage.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/deduplication.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_processing.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/entity_processing.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/fact_storage.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/link_creation.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/link_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/orchestrator.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/retain/types.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/fusion.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/graph_retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/reranking.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/tags.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/temporal_extraction.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/think_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/trace.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/tracer.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/search/types.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/task_backend.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/base.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/builtin/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/context.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/http.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/loader.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/operation_validator.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/extensions/tenant.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/mcp_local.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/mcp_tools.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/metrics.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/migrations.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/pg0.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/server.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/worker/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/worker/main.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/worker/poller.py +0 -0
|
@@ -45,9 +45,12 @@ hindsight-docs/static/llms-full.txt
|
|
|
45
45
|
|
|
46
46
|
hindsight-dev/benchmarks/locomo/results/
|
|
47
47
|
hindsight-dev/benchmarks/longmemeval/results/
|
|
48
|
+
hindsight-dev/benchmarks/consolidation/results/
|
|
49
|
+
benchmarks/results/
|
|
48
50
|
hindsight-cli/target
|
|
49
51
|
hindsight-clients/rust/target
|
|
50
52
|
.claude
|
|
51
53
|
whats-next.md
|
|
52
54
|
TASK.md
|
|
53
|
-
|
|
55
|
+
# Changelog is now tracked in hindsight-docs/src/pages/changelog.md
|
|
56
|
+
# CHANGELOG.md
|
|
@@ -1323,7 +1323,7 @@ class VersionResponse(BaseModel):
|
|
|
1323
1323
|
model_config = ConfigDict(
|
|
1324
1324
|
json_schema_extra={
|
|
1325
1325
|
"example": {
|
|
1326
|
-
"api_version": "
|
|
1326
|
+
"api_version": "0.4.0",
|
|
1327
1327
|
"features": {
|
|
1328
1328
|
"observations": False,
|
|
1329
1329
|
"mcp": True,
|
|
@@ -1567,11 +1567,12 @@ def _register_routes(app: FastAPI):
|
|
|
1567
1567
|
Returns version info and feature flags that can be used by clients
|
|
1568
1568
|
to determine which capabilities are available.
|
|
1569
1569
|
"""
|
|
1570
|
+
from hindsight_api import __version__
|
|
1570
1571
|
from hindsight_api.config import get_config
|
|
1571
1572
|
|
|
1572
1573
|
config = get_config()
|
|
1573
1574
|
return VersionResponse(
|
|
1574
|
-
api_version=
|
|
1575
|
+
api_version=__version__,
|
|
1575
1576
|
features=FeaturesInfo(
|
|
1576
1577
|
observations=config.enable_observations,
|
|
1577
1578
|
mcp=config.mcp_enabled,
|
|
@@ -20,11 +20,15 @@ logger = logging.getLogger(__name__)
|
|
|
20
20
|
|
|
21
21
|
# Environment variable names
|
|
22
22
|
ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
|
|
23
|
+
ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
|
|
23
24
|
ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
|
|
24
25
|
ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
|
|
25
26
|
ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
26
27
|
ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
|
|
27
28
|
ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
|
|
29
|
+
ENV_LLM_MAX_RETRIES = "HINDSIGHT_API_LLM_MAX_RETRIES"
|
|
30
|
+
ENV_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_LLM_INITIAL_BACKOFF"
|
|
31
|
+
ENV_LLM_MAX_BACKOFF = "HINDSIGHT_API_LLM_MAX_BACKOFF"
|
|
28
32
|
ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
|
|
29
33
|
ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
|
|
30
34
|
|
|
@@ -33,19 +37,35 @@ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
|
|
|
33
37
|
ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
|
|
34
38
|
ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
|
|
35
39
|
ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
|
|
40
|
+
ENV_RETAIN_LLM_MAX_CONCURRENT = "HINDSIGHT_API_RETAIN_LLM_MAX_CONCURRENT"
|
|
41
|
+
ENV_RETAIN_LLM_MAX_RETRIES = "HINDSIGHT_API_RETAIN_LLM_MAX_RETRIES"
|
|
42
|
+
ENV_RETAIN_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_INITIAL_BACKOFF"
|
|
43
|
+
ENV_RETAIN_LLM_MAX_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF"
|
|
44
|
+
ENV_RETAIN_LLM_TIMEOUT = "HINDSIGHT_API_RETAIN_LLM_TIMEOUT"
|
|
36
45
|
|
|
37
46
|
ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
|
|
38
47
|
ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
|
|
39
48
|
ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
|
|
40
49
|
ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
|
|
50
|
+
ENV_REFLECT_LLM_MAX_CONCURRENT = "HINDSIGHT_API_REFLECT_LLM_MAX_CONCURRENT"
|
|
51
|
+
ENV_REFLECT_LLM_MAX_RETRIES = "HINDSIGHT_API_REFLECT_LLM_MAX_RETRIES"
|
|
52
|
+
ENV_REFLECT_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_INITIAL_BACKOFF"
|
|
53
|
+
ENV_REFLECT_LLM_MAX_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_MAX_BACKOFF"
|
|
54
|
+
ENV_REFLECT_LLM_TIMEOUT = "HINDSIGHT_API_REFLECT_LLM_TIMEOUT"
|
|
41
55
|
|
|
42
56
|
ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
|
|
43
57
|
ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
|
|
44
58
|
ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
|
|
45
59
|
ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
60
|
+
ENV_CONSOLIDATION_LLM_MAX_CONCURRENT = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_CONCURRENT"
|
|
61
|
+
ENV_CONSOLIDATION_LLM_MAX_RETRIES = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_RETRIES"
|
|
62
|
+
ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_INITIAL_BACKOFF"
|
|
63
|
+
ENV_CONSOLIDATION_LLM_MAX_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_BACKOFF"
|
|
64
|
+
ENV_CONSOLIDATION_LLM_TIMEOUT = "HINDSIGHT_API_CONSOLIDATION_LLM_TIMEOUT"
|
|
46
65
|
|
|
47
66
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
48
67
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
68
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
|
|
49
69
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
50
70
|
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
|
|
51
71
|
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
|
|
@@ -65,6 +85,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
|
|
|
65
85
|
|
|
66
86
|
ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
|
|
67
87
|
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
|
|
88
|
+
ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
|
|
68
89
|
ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
|
|
69
90
|
ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
|
|
70
91
|
ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
|
|
@@ -98,6 +119,7 @@ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
|
98
119
|
# Observations settings (consolidated knowledge from facts)
|
|
99
120
|
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
100
121
|
ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
|
|
122
|
+
ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
|
|
101
123
|
|
|
102
124
|
# Optimization flags
|
|
103
125
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
@@ -125,18 +147,24 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
|
125
147
|
|
|
126
148
|
# Default values
|
|
127
149
|
DEFAULT_DATABASE_URL = "pg0"
|
|
150
|
+
DEFAULT_DATABASE_SCHEMA = "public"
|
|
128
151
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
129
152
|
DEFAULT_LLM_MODEL = "gpt-5-mini"
|
|
130
153
|
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
154
|
+
DEFAULT_LLM_MAX_RETRIES = 10 # Max retry attempts for LLM API calls
|
|
155
|
+
DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry exponential backoff
|
|
156
|
+
DEFAULT_LLM_MAX_BACKOFF = 60.0 # Max backoff cap in seconds for retry exponential backoff
|
|
131
157
|
DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
132
158
|
|
|
133
159
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
134
160
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
161
|
+
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
|
|
135
162
|
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
|
|
136
163
|
DEFAULT_EMBEDDING_DIMENSION = 384
|
|
137
164
|
|
|
138
165
|
DEFAULT_RERANKER_PROVIDER = "local"
|
|
139
166
|
DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
167
|
+
DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
|
|
140
168
|
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
|
|
141
169
|
DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
|
|
142
170
|
DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
|
|
@@ -177,6 +205,7 @@ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (a
|
|
|
177
205
|
# Observations defaults (consolidated knowledge from facts)
|
|
178
206
|
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
179
207
|
DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
|
|
208
|
+
DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
|
|
180
209
|
|
|
181
210
|
# Database migrations
|
|
182
211
|
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
@@ -270,6 +299,7 @@ class HindsightConfig:
|
|
|
270
299
|
|
|
271
300
|
# Database
|
|
272
301
|
database_url: str
|
|
302
|
+
database_schema: str
|
|
273
303
|
|
|
274
304
|
# LLM (default, used as fallback for per-operation config)
|
|
275
305
|
llm_provider: str
|
|
@@ -277,6 +307,9 @@ class HindsightConfig:
|
|
|
277
307
|
llm_model: str
|
|
278
308
|
llm_base_url: str | None
|
|
279
309
|
llm_max_concurrent: int
|
|
310
|
+
llm_max_retries: int
|
|
311
|
+
llm_initial_backoff: float
|
|
312
|
+
llm_max_backoff: float
|
|
280
313
|
llm_timeout: float
|
|
281
314
|
|
|
282
315
|
# Per-operation LLM configuration (None = use default LLM config)
|
|
@@ -284,20 +317,36 @@ class HindsightConfig:
|
|
|
284
317
|
retain_llm_api_key: str | None
|
|
285
318
|
retain_llm_model: str | None
|
|
286
319
|
retain_llm_base_url: str | None
|
|
320
|
+
retain_llm_max_concurrent: int | None
|
|
321
|
+
retain_llm_max_retries: int | None
|
|
322
|
+
retain_llm_initial_backoff: float | None
|
|
323
|
+
retain_llm_max_backoff: float | None
|
|
324
|
+
retain_llm_timeout: float | None
|
|
287
325
|
|
|
288
326
|
reflect_llm_provider: str | None
|
|
289
327
|
reflect_llm_api_key: str | None
|
|
290
328
|
reflect_llm_model: str | None
|
|
291
329
|
reflect_llm_base_url: str | None
|
|
330
|
+
reflect_llm_max_concurrent: int | None
|
|
331
|
+
reflect_llm_max_retries: int | None
|
|
332
|
+
reflect_llm_initial_backoff: float | None
|
|
333
|
+
reflect_llm_max_backoff: float | None
|
|
334
|
+
reflect_llm_timeout: float | None
|
|
292
335
|
|
|
293
336
|
consolidation_llm_provider: str | None
|
|
294
337
|
consolidation_llm_api_key: str | None
|
|
295
338
|
consolidation_llm_model: str | None
|
|
296
339
|
consolidation_llm_base_url: str | None
|
|
340
|
+
consolidation_llm_max_concurrent: int | None
|
|
341
|
+
consolidation_llm_max_retries: int | None
|
|
342
|
+
consolidation_llm_initial_backoff: float | None
|
|
343
|
+
consolidation_llm_max_backoff: float | None
|
|
344
|
+
consolidation_llm_timeout: float | None
|
|
297
345
|
|
|
298
346
|
# Embeddings
|
|
299
347
|
embeddings_provider: str
|
|
300
348
|
embeddings_local_model: str
|
|
349
|
+
embeddings_local_force_cpu: bool
|
|
301
350
|
embeddings_tei_url: str | None
|
|
302
351
|
embeddings_openai_base_url: str | None
|
|
303
352
|
embeddings_cohere_base_url: str | None
|
|
@@ -305,6 +354,8 @@ class HindsightConfig:
|
|
|
305
354
|
# Reranker
|
|
306
355
|
reranker_provider: str
|
|
307
356
|
reranker_local_model: str
|
|
357
|
+
reranker_local_force_cpu: bool
|
|
358
|
+
reranker_local_max_concurrent: int
|
|
308
359
|
reranker_tei_url: str | None
|
|
309
360
|
reranker_tei_batch_size: int
|
|
310
361
|
reranker_tei_max_concurrent: int
|
|
@@ -336,6 +387,7 @@ class HindsightConfig:
|
|
|
336
387
|
# Observations settings (consolidated knowledge from facts)
|
|
337
388
|
enable_observations: bool
|
|
338
389
|
consolidation_batch_size: int
|
|
390
|
+
consolidation_max_tokens: int
|
|
339
391
|
|
|
340
392
|
# Optimization flags
|
|
341
393
|
skip_llm_verification: bool
|
|
@@ -367,35 +419,93 @@ class HindsightConfig:
|
|
|
367
419
|
return cls(
|
|
368
420
|
# Database
|
|
369
421
|
database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
|
|
422
|
+
database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
|
|
370
423
|
# LLM
|
|
371
424
|
llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
|
|
372
425
|
llm_api_key=os.getenv(ENV_LLM_API_KEY),
|
|
373
426
|
llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
|
|
374
427
|
llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
|
|
375
428
|
llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
|
|
429
|
+
llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
|
|
430
|
+
llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
|
|
431
|
+
llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
|
|
376
432
|
llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
|
|
377
433
|
# Per-operation LLM config (None = use default)
|
|
378
434
|
retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
|
|
379
435
|
retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
|
|
380
436
|
retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
|
|
381
437
|
retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
|
|
438
|
+
retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
|
|
439
|
+
if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
|
|
440
|
+
else None,
|
|
441
|
+
retain_llm_max_retries=int(os.getenv(ENV_RETAIN_LLM_MAX_RETRIES))
|
|
442
|
+
if os.getenv(ENV_RETAIN_LLM_MAX_RETRIES)
|
|
443
|
+
else None,
|
|
444
|
+
retain_llm_initial_backoff=float(os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF))
|
|
445
|
+
if os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF)
|
|
446
|
+
else None,
|
|
447
|
+
retain_llm_max_backoff=float(os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF))
|
|
448
|
+
if os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF)
|
|
449
|
+
else None,
|
|
450
|
+
retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
|
|
382
451
|
reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
|
|
383
452
|
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
384
453
|
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
|
|
385
454
|
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
455
|
+
reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
|
|
456
|
+
if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
|
|
457
|
+
else None,
|
|
458
|
+
reflect_llm_max_retries=int(os.getenv(ENV_REFLECT_LLM_MAX_RETRIES))
|
|
459
|
+
if os.getenv(ENV_REFLECT_LLM_MAX_RETRIES)
|
|
460
|
+
else None,
|
|
461
|
+
reflect_llm_initial_backoff=float(os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF))
|
|
462
|
+
if os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF)
|
|
463
|
+
else None,
|
|
464
|
+
reflect_llm_max_backoff=float(os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF))
|
|
465
|
+
if os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF)
|
|
466
|
+
else None,
|
|
467
|
+
reflect_llm_timeout=float(os.getenv(ENV_REFLECT_LLM_TIMEOUT))
|
|
468
|
+
if os.getenv(ENV_REFLECT_LLM_TIMEOUT)
|
|
469
|
+
else None,
|
|
386
470
|
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
|
|
387
471
|
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
|
|
388
472
|
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
|
|
389
473
|
consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
|
|
474
|
+
consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
|
|
475
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
|
|
476
|
+
else None,
|
|
477
|
+
consolidation_llm_max_retries=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES))
|
|
478
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES)
|
|
479
|
+
else None,
|
|
480
|
+
consolidation_llm_initial_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF))
|
|
481
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF)
|
|
482
|
+
else None,
|
|
483
|
+
consolidation_llm_max_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF))
|
|
484
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF)
|
|
485
|
+
else None,
|
|
486
|
+
consolidation_llm_timeout=float(os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT))
|
|
487
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT)
|
|
488
|
+
else None,
|
|
390
489
|
# Embeddings
|
|
391
490
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
392
491
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
492
|
+
embeddings_local_force_cpu=os.getenv(
|
|
493
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
|
|
494
|
+
).lower()
|
|
495
|
+
in ("true", "1"),
|
|
393
496
|
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
|
|
394
497
|
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
|
|
395
498
|
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
|
|
396
499
|
# Reranker
|
|
397
500
|
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
|
|
398
501
|
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
|
|
502
|
+
reranker_local_force_cpu=os.getenv(
|
|
503
|
+
ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
|
|
504
|
+
).lower()
|
|
505
|
+
in ("true", "1"),
|
|
506
|
+
reranker_local_max_concurrent=int(
|
|
507
|
+
os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
|
|
508
|
+
),
|
|
399
509
|
reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
|
|
400
510
|
reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
|
|
401
511
|
reranker_tei_max_concurrent=int(
|
|
@@ -444,6 +554,9 @@ class HindsightConfig:
|
|
|
444
554
|
consolidation_batch_size=int(
|
|
445
555
|
os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
|
|
446
556
|
),
|
|
557
|
+
consolidation_max_tokens=int(
|
|
558
|
+
os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
|
|
559
|
+
),
|
|
447
560
|
# Database migrations
|
|
448
561
|
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
449
562
|
# Database connection pool
|
|
@@ -515,7 +628,7 @@ class HindsightConfig:
|
|
|
515
628
|
|
|
516
629
|
def log_config(self) -> None:
|
|
517
630
|
"""Log the current configuration (without sensitive values)."""
|
|
518
|
-
logger.info(f"Database: {self.database_url}")
|
|
631
|
+
logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
|
|
519
632
|
logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
|
|
520
633
|
if self.retain_llm_provider or self.retain_llm_model:
|
|
521
634
|
retain_provider = self.retain_llm_provider or self.llm_provider
|
|
@@ -52,7 +52,10 @@ class IdleTimeoutMiddleware:
|
|
|
52
52
|
logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
|
|
53
53
|
# Give a moment for any in-flight requests
|
|
54
54
|
await asyncio.sleep(1)
|
|
55
|
-
|
|
55
|
+
# Send SIGTERM to ourselves to trigger graceful shutdown
|
|
56
|
+
import signal
|
|
57
|
+
|
|
58
|
+
os.kill(os.getpid(), signal.SIGTERM)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
class DaemonLock:
|
{hindsight_api-0.4.0 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/consolidator.py
RENAMED
|
@@ -144,10 +144,14 @@ async def run_consolidation_job(
|
|
|
144
144
|
}
|
|
145
145
|
|
|
146
146
|
batch_num = 0
|
|
147
|
+
last_progress_timings = {} # Track timings at last progress log
|
|
147
148
|
while True:
|
|
148
149
|
batch_num += 1
|
|
149
150
|
batch_start = time.time()
|
|
150
151
|
|
|
152
|
+
# Snapshot timings at batch start for per-batch calculation
|
|
153
|
+
batch_start_timings = perf.timings.copy()
|
|
154
|
+
|
|
151
155
|
# Fetch next batch of unconsolidated memories
|
|
152
156
|
async with pool.acquire() as conn:
|
|
153
157
|
t0 = time.time()
|
|
@@ -217,19 +221,44 @@ async def run_consolidation_job(
|
|
|
217
221
|
elif action == "skipped":
|
|
218
222
|
stats["skipped"] += 1
|
|
219
223
|
|
|
220
|
-
# Log progress periodically
|
|
224
|
+
# Log progress periodically with timing breakdown
|
|
221
225
|
if stats["memories_processed"] % 10 == 0:
|
|
226
|
+
# Calculate timing deltas since last progress log
|
|
227
|
+
timing_parts = []
|
|
228
|
+
for key in ["recall", "llm", "embedding", "db_write"]:
|
|
229
|
+
if key in perf.timings:
|
|
230
|
+
delta = perf.timings[key] - last_progress_timings.get(key, 0)
|
|
231
|
+
timing_parts.append(f"{key}={delta:.2f}s")
|
|
232
|
+
|
|
233
|
+
timing_str = f" | {', '.join(timing_parts)}" if timing_parts else ""
|
|
222
234
|
logger.info(
|
|
223
235
|
f"[CONSOLIDATION] bank={bank_id} progress: "
|
|
224
|
-
f"{stats['memories_processed']}/{total_count} memories processed"
|
|
236
|
+
f"{stats['memories_processed']}/{total_count} memories processed{timing_str}"
|
|
225
237
|
)
|
|
226
238
|
|
|
239
|
+
# Update last progress snapshot
|
|
240
|
+
last_progress_timings = perf.timings.copy()
|
|
241
|
+
|
|
227
242
|
batch_time = time.time() - batch_start
|
|
228
243
|
perf.log(
|
|
229
244
|
f"[2] Batch {batch_num}: {len(memories)} memories in {batch_time:.3f}s "
|
|
230
245
|
f"(avg {batch_time / len(memories):.3f}s/memory)"
|
|
231
246
|
)
|
|
232
247
|
|
|
248
|
+
# Log timing breakdown after each batch (delta from batch start)
|
|
249
|
+
timing_parts = []
|
|
250
|
+
for key in ["recall", "llm", "embedding", "db_write"]:
|
|
251
|
+
if key in perf.timings:
|
|
252
|
+
delta = perf.timings[key] - batch_start_timings.get(key, 0)
|
|
253
|
+
timing_parts.append(f"{key}={delta:.3f}s")
|
|
254
|
+
|
|
255
|
+
if timing_parts:
|
|
256
|
+
avg_per_memory = batch_time / len(memories) if memories else 0
|
|
257
|
+
logger.info(
|
|
258
|
+
f"[CONSOLIDATION] bank={bank_id} batch {batch_num}/{len(memories)} memories: "
|
|
259
|
+
f"{', '.join(timing_parts)} | avg={avg_per_memory:.3f}s/memory"
|
|
260
|
+
)
|
|
261
|
+
|
|
233
262
|
# Build summary
|
|
234
263
|
perf.log(
|
|
235
264
|
f"[3] Results: {stats['memories_processed']} memories -> "
|
|
@@ -639,28 +668,27 @@ async def _find_related_observations(
|
|
|
639
668
|
request_context: "RequestContext",
|
|
640
669
|
) -> list[dict[str, Any]]:
|
|
641
670
|
"""
|
|
642
|
-
Find observations related to the given query using
|
|
671
|
+
Find observations related to the given query using optimized recall.
|
|
643
672
|
|
|
644
673
|
IMPORTANT: We do NOT filter by tags here. Consolidation needs to see ALL
|
|
645
674
|
potentially related observations regardless of scope, so the LLM can
|
|
646
675
|
decide on tag routing (same scope update vs cross-scope create).
|
|
647
676
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
- BM25 text search (keyword matching)
|
|
651
|
-
- Entity-based retrieval (shared entities)
|
|
652
|
-
- Graph traversal (connected via entity links)
|
|
677
|
+
Uses max_tokens to naturally limit observations (no artificial count limit).
|
|
678
|
+
Includes source memories with dates for LLM context.
|
|
653
679
|
|
|
654
680
|
Returns:
|
|
655
|
-
List of related observations with their tags
|
|
681
|
+
List of related observations with their tags, source memories, and dates
|
|
656
682
|
"""
|
|
657
|
-
# Use recall to find related observations
|
|
658
|
-
#
|
|
659
|
-
|
|
683
|
+
# Use recall to find related observations with token budget
|
|
684
|
+
# max_tokens naturally limits how many observations are returned
|
|
685
|
+
from ...config import get_config
|
|
686
|
+
|
|
687
|
+
config = get_config()
|
|
660
688
|
recall_result = await memory_engine.recall_async(
|
|
661
689
|
bank_id=bank_id,
|
|
662
690
|
query=query,
|
|
663
|
-
max_tokens=
|
|
691
|
+
max_tokens=config.consolidation_max_tokens, # Token budget for observations (configurable)
|
|
664
692
|
fact_type=["observation"], # Only retrieve observations
|
|
665
693
|
request_context=request_context,
|
|
666
694
|
_quiet=True, # Suppress logging
|
|
@@ -668,43 +696,82 @@ async def _find_related_observations(
|
|
|
668
696
|
)
|
|
669
697
|
|
|
670
698
|
# If no observations returned, return empty list
|
|
671
|
-
# When fact_type=["observation"], results come back in `results` field
|
|
672
699
|
if not recall_result.results:
|
|
673
700
|
return []
|
|
674
701
|
|
|
675
|
-
#
|
|
702
|
+
# Batch fetch all observations in a single query (no artificial limit)
|
|
703
|
+
observation_ids = [uuid.UUID(obs.id) for obs in recall_result.results]
|
|
704
|
+
|
|
705
|
+
rows = await conn.fetch(
|
|
706
|
+
f"""
|
|
707
|
+
SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at,
|
|
708
|
+
occurred_start, occurred_end, mentioned_at
|
|
709
|
+
FROM {fq_table("memory_units")}
|
|
710
|
+
WHERE id = ANY($1) AND bank_id = $2 AND fact_type = 'observation'
|
|
711
|
+
""",
|
|
712
|
+
observation_ids,
|
|
713
|
+
bank_id,
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
# Build results list preserving recall order
|
|
717
|
+
id_to_row = {row["id"]: row for row in rows}
|
|
676
718
|
results = []
|
|
677
|
-
for obs in recall_result.results:
|
|
678
|
-
# Fetch full observation data from DB to get history, source_memory_ids, tags
|
|
679
|
-
row = await conn.fetchrow(
|
|
680
|
-
f"""
|
|
681
|
-
SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
|
|
682
|
-
FROM {fq_table("memory_units")}
|
|
683
|
-
WHERE id = $1 AND bank_id = $2 AND fact_type = 'observation'
|
|
684
|
-
""",
|
|
685
|
-
uuid.UUID(obs.id),
|
|
686
|
-
bank_id,
|
|
687
|
-
)
|
|
688
719
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
720
|
+
for obs in recall_result.results:
|
|
721
|
+
obs_id = uuid.UUID(obs.id)
|
|
722
|
+
if obs_id not in id_to_row:
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
row = id_to_row[obs_id]
|
|
726
|
+
history = row["history"]
|
|
727
|
+
if isinstance(history, str):
|
|
728
|
+
history = json.loads(history)
|
|
729
|
+
elif history is None:
|
|
730
|
+
history = []
|
|
731
|
+
|
|
732
|
+
# Fetch source memories to include their text and dates
|
|
733
|
+
source_memory_ids = row["source_memory_ids"] or []
|
|
734
|
+
source_memories = []
|
|
735
|
+
|
|
736
|
+
if source_memory_ids:
|
|
737
|
+
source_rows = await conn.fetch(
|
|
738
|
+
f"""
|
|
739
|
+
SELECT text, occurred_start, occurred_end, mentioned_at, event_date
|
|
740
|
+
FROM {fq_table("memory_units")}
|
|
741
|
+
WHERE id = ANY($1) AND bank_id = $2
|
|
742
|
+
ORDER BY created_at ASC
|
|
743
|
+
LIMIT 5
|
|
744
|
+
""",
|
|
745
|
+
source_memory_ids[:5], # Limit to first 5 source memories for token efficiency
|
|
746
|
+
bank_id,
|
|
706
747
|
)
|
|
707
748
|
|
|
749
|
+
for src_row in source_rows:
|
|
750
|
+
source_memories.append(
|
|
751
|
+
{
|
|
752
|
+
"text": src_row["text"],
|
|
753
|
+
"occurred_start": src_row["occurred_start"],
|
|
754
|
+
"occurred_end": src_row["occurred_end"],
|
|
755
|
+
"mentioned_at": src_row["mentioned_at"],
|
|
756
|
+
"event_date": src_row["event_date"],
|
|
757
|
+
}
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
results.append(
|
|
761
|
+
{
|
|
762
|
+
"id": row["id"],
|
|
763
|
+
"text": row["text"],
|
|
764
|
+
"proof_count": row["proof_count"] or 1,
|
|
765
|
+
"tags": row["tags"] or [],
|
|
766
|
+
"source_memories": source_memories,
|
|
767
|
+
"occurred_start": row["occurred_start"],
|
|
768
|
+
"occurred_end": row["occurred_end"],
|
|
769
|
+
"mentioned_at": row["mentioned_at"],
|
|
770
|
+
"created_at": row["created_at"],
|
|
771
|
+
"updated_at": row["updated_at"],
|
|
772
|
+
}
|
|
773
|
+
)
|
|
774
|
+
|
|
708
775
|
return results
|
|
709
776
|
|
|
710
777
|
|
|
@@ -732,14 +799,43 @@ async def _consolidate_with_llm(
|
|
|
732
799
|
- {"action": "create", "text": "...", "reason": "..."}
|
|
733
800
|
- [] if fact is purely ephemeral (no durable knowledge)
|
|
734
801
|
"""
|
|
735
|
-
# Format observations
|
|
802
|
+
# Format observations as JSON with source memories and dates
|
|
736
803
|
if observations:
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
804
|
+
obs_list = []
|
|
805
|
+
for obs in observations:
|
|
806
|
+
obs_data = {
|
|
807
|
+
"id": str(obs["id"]),
|
|
808
|
+
"text": obs["text"],
|
|
809
|
+
"proof_count": obs["proof_count"],
|
|
810
|
+
"tags": obs["tags"],
|
|
811
|
+
"created_at": obs["created_at"].isoformat() if obs.get("created_at") else None,
|
|
812
|
+
"updated_at": obs["updated_at"].isoformat() if obs.get("updated_at") else None,
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
# Include temporal info if available
|
|
816
|
+
if obs.get("occurred_start"):
|
|
817
|
+
obs_data["occurred_start"] = obs["occurred_start"].isoformat()
|
|
818
|
+
if obs.get("occurred_end"):
|
|
819
|
+
obs_data["occurred_end"] = obs["occurred_end"].isoformat()
|
|
820
|
+
if obs.get("mentioned_at"):
|
|
821
|
+
obs_data["mentioned_at"] = obs["mentioned_at"].isoformat()
|
|
822
|
+
|
|
823
|
+
# Include source memories (up to 3 for brevity)
|
|
824
|
+
if obs.get("source_memories"):
|
|
825
|
+
obs_data["source_memories"] = [
|
|
826
|
+
{
|
|
827
|
+
"text": sm["text"],
|
|
828
|
+
"event_date": sm["event_date"].isoformat() if sm.get("event_date") else None,
|
|
829
|
+
"occurred_start": sm["occurred_start"].isoformat() if sm.get("occurred_start") else None,
|
|
830
|
+
}
|
|
831
|
+
for sm in obs["source_memories"][:3] # Limit to 3 for token efficiency
|
|
832
|
+
]
|
|
833
|
+
|
|
834
|
+
obs_list.append(obs_data)
|
|
835
|
+
|
|
836
|
+
observations_text = json.dumps(obs_list, indent=2)
|
|
741
837
|
else:
|
|
742
|
-
observations_text = "
|
|
838
|
+
observations_text = "[]"
|
|
743
839
|
|
|
744
840
|
# Only include mission section if mission is set and not the default
|
|
745
841
|
mission_section = ""
|
|
@@ -47,23 +47,31 @@ CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowle
|
|
|
47
47
|
{mission_section}
|
|
48
48
|
NEW FACT: {fact_text}
|
|
49
49
|
|
|
50
|
-
EXISTING OBSERVATIONS:
|
|
50
|
+
EXISTING OBSERVATIONS (JSON array with source memories and dates):
|
|
51
51
|
{observations_text}
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
Each observation includes:
|
|
54
|
+
- id: unique identifier for updating
|
|
55
|
+
- text: the observation content
|
|
56
|
+
- proof_count: number of supporting memories
|
|
57
|
+
- tags: visibility scope (handled automatically)
|
|
58
|
+
- created_at/updated_at: when observation was created/modified
|
|
59
|
+
- occurred_start/occurred_end: temporal range of source facts
|
|
60
|
+
- source_memories: array of supporting facts with their text and dates
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
Instructions:
|
|
63
|
+
1. Extract DURABLE KNOWLEDGE from the new fact (not ephemeral state)
|
|
64
|
+
2. Review source_memories in existing observations to understand evidence
|
|
65
|
+
3. Check dates to detect contradictions or updates
|
|
66
|
+
4. Compare with observations:
|
|
67
|
+
- Same topic → UPDATE with learning_id
|
|
68
|
+
- New topic → CREATE new observation
|
|
69
|
+
- Purely ephemeral → return []
|
|
70
|
+
|
|
71
|
+
Output JSON array of actions:
|
|
60
72
|
[
|
|
61
|
-
{{"action": "update", "learning_id": "uuid", "text": "updated
|
|
73
|
+
{{"action": "update", "learning_id": "uuid-from-observations", "text": "updated knowledge", "reason": "..."}},
|
|
62
74
|
{{"action": "create", "text": "new durable knowledge", "reason": "..."}}
|
|
63
75
|
]
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
[]
|
|
67
|
-
|
|
68
|
-
If no observations exist and fact contains durable knowledge:
|
|
69
|
-
[{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""
|
|
77
|
+
Return [] if fact contains no durable knowledge."""
|