hindsight-api 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/PKG-INFO +1 -1
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/__init__.py +1 -1
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/config.py +85 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/daemon.py +4 -1
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/consolidator.py +31 -2
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/cross_encoder.py +5 -97
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/embeddings.py +2 -92
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/fact_extraction.py +16 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/main.py +21 -9
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/pyproject.toml +1 -1
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/.gitignore +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/README.md +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/admin/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/admin/cli.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/README +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/env.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/script.py.mako +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/api/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/api/http.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/api/mcp.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/banner.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/prompts.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/db_budget.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/db_utils.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/directives/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/directives/models.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/entity_resolver.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/interface.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/llm_wrapper.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/memory_engine.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/mental_models/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/mental_models/models.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/query_analyzer.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/agent.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/models.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/observations.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/prompts.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/tools.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/reflect/tools_schema.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/response_models.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/bank_utils.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/chunk_storage.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/deduplication.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_processing.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_utils.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/entity_processing.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/fact_storage.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/link_creation.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/link_utils.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/orchestrator.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/types.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/fusion.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/graph_retrieval.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/reranking.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/retrieval.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/tags.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/temporal_extraction.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/think_utils.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/trace.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/tracer.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/types.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/task_backend.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/utils.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/base.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/builtin/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/builtin/tenant.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/context.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/http.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/loader.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/operation_validator.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/extensions/tenant.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/mcp_local.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/mcp_tools.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/metrics.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/migrations.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/models.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/pg0.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/server.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/worker/__init__.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/worker/main.py +0 -0
- {hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/worker/poller.py +0 -0
|
@@ -26,6 +26,9 @@ ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
|
|
|
26
26
|
ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
27
27
|
ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
|
|
28
28
|
ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
|
|
29
|
+
ENV_LLM_MAX_RETRIES = "HINDSIGHT_API_LLM_MAX_RETRIES"
|
|
30
|
+
ENV_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_LLM_INITIAL_BACKOFF"
|
|
31
|
+
ENV_LLM_MAX_BACKOFF = "HINDSIGHT_API_LLM_MAX_BACKOFF"
|
|
29
32
|
ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
|
|
30
33
|
ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
|
|
31
34
|
|
|
@@ -34,16 +37,31 @@ ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
|
|
|
34
37
|
ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
|
|
35
38
|
ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
|
|
36
39
|
ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
|
|
40
|
+
ENV_RETAIN_LLM_MAX_CONCURRENT = "HINDSIGHT_API_RETAIN_LLM_MAX_CONCURRENT"
|
|
41
|
+
ENV_RETAIN_LLM_MAX_RETRIES = "HINDSIGHT_API_RETAIN_LLM_MAX_RETRIES"
|
|
42
|
+
ENV_RETAIN_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_INITIAL_BACKOFF"
|
|
43
|
+
ENV_RETAIN_LLM_MAX_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF"
|
|
44
|
+
ENV_RETAIN_LLM_TIMEOUT = "HINDSIGHT_API_RETAIN_LLM_TIMEOUT"
|
|
37
45
|
|
|
38
46
|
ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
|
|
39
47
|
ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
|
|
40
48
|
ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
|
|
41
49
|
ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
|
|
50
|
+
ENV_REFLECT_LLM_MAX_CONCURRENT = "HINDSIGHT_API_REFLECT_LLM_MAX_CONCURRENT"
|
|
51
|
+
ENV_REFLECT_LLM_MAX_RETRIES = "HINDSIGHT_API_REFLECT_LLM_MAX_RETRIES"
|
|
52
|
+
ENV_REFLECT_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_INITIAL_BACKOFF"
|
|
53
|
+
ENV_REFLECT_LLM_MAX_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_MAX_BACKOFF"
|
|
54
|
+
ENV_REFLECT_LLM_TIMEOUT = "HINDSIGHT_API_REFLECT_LLM_TIMEOUT"
|
|
42
55
|
|
|
43
56
|
ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
|
|
44
57
|
ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
|
|
45
58
|
ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
|
|
46
59
|
ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
60
|
+
ENV_CONSOLIDATION_LLM_MAX_CONCURRENT = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_CONCURRENT"
|
|
61
|
+
ENV_CONSOLIDATION_LLM_MAX_RETRIES = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_RETRIES"
|
|
62
|
+
ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_INITIAL_BACKOFF"
|
|
63
|
+
ENV_CONSOLIDATION_LLM_MAX_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_BACKOFF"
|
|
64
|
+
ENV_CONSOLIDATION_LLM_TIMEOUT = "HINDSIGHT_API_CONSOLIDATION_LLM_TIMEOUT"
|
|
47
65
|
|
|
48
66
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
49
67
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
@@ -133,6 +151,9 @@ DEFAULT_DATABASE_SCHEMA = "public"
|
|
|
133
151
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
134
152
|
DEFAULT_LLM_MODEL = "gpt-5-mini"
|
|
135
153
|
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
154
|
+
DEFAULT_LLM_MAX_RETRIES = 10 # Max retry attempts for LLM API calls
|
|
155
|
+
DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry exponential backoff
|
|
156
|
+
DEFAULT_LLM_MAX_BACKOFF = 60.0 # Max backoff cap in seconds for retry exponential backoff
|
|
136
157
|
DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
137
158
|
|
|
138
159
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
@@ -286,6 +307,9 @@ class HindsightConfig:
|
|
|
286
307
|
llm_model: str
|
|
287
308
|
llm_base_url: str | None
|
|
288
309
|
llm_max_concurrent: int
|
|
310
|
+
llm_max_retries: int
|
|
311
|
+
llm_initial_backoff: float
|
|
312
|
+
llm_max_backoff: float
|
|
289
313
|
llm_timeout: float
|
|
290
314
|
|
|
291
315
|
# Per-operation LLM configuration (None = use default LLM config)
|
|
@@ -293,16 +317,31 @@ class HindsightConfig:
|
|
|
293
317
|
retain_llm_api_key: str | None
|
|
294
318
|
retain_llm_model: str | None
|
|
295
319
|
retain_llm_base_url: str | None
|
|
320
|
+
retain_llm_max_concurrent: int | None
|
|
321
|
+
retain_llm_max_retries: int | None
|
|
322
|
+
retain_llm_initial_backoff: float | None
|
|
323
|
+
retain_llm_max_backoff: float | None
|
|
324
|
+
retain_llm_timeout: float | None
|
|
296
325
|
|
|
297
326
|
reflect_llm_provider: str | None
|
|
298
327
|
reflect_llm_api_key: str | None
|
|
299
328
|
reflect_llm_model: str | None
|
|
300
329
|
reflect_llm_base_url: str | None
|
|
330
|
+
reflect_llm_max_concurrent: int | None
|
|
331
|
+
reflect_llm_max_retries: int | None
|
|
332
|
+
reflect_llm_initial_backoff: float | None
|
|
333
|
+
reflect_llm_max_backoff: float | None
|
|
334
|
+
reflect_llm_timeout: float | None
|
|
301
335
|
|
|
302
336
|
consolidation_llm_provider: str | None
|
|
303
337
|
consolidation_llm_api_key: str | None
|
|
304
338
|
consolidation_llm_model: str | None
|
|
305
339
|
consolidation_llm_base_url: str | None
|
|
340
|
+
consolidation_llm_max_concurrent: int | None
|
|
341
|
+
consolidation_llm_max_retries: int | None
|
|
342
|
+
consolidation_llm_initial_backoff: float | None
|
|
343
|
+
consolidation_llm_max_backoff: float | None
|
|
344
|
+
consolidation_llm_timeout: float | None
|
|
306
345
|
|
|
307
346
|
# Embeddings
|
|
308
347
|
embeddings_provider: str
|
|
@@ -387,20 +426,66 @@ class HindsightConfig:
|
|
|
387
426
|
llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
|
|
388
427
|
llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
|
|
389
428
|
llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
|
|
429
|
+
llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
|
|
430
|
+
llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
|
|
431
|
+
llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
|
|
390
432
|
llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
|
|
391
433
|
# Per-operation LLM config (None = use default)
|
|
392
434
|
retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
|
|
393
435
|
retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
|
|
394
436
|
retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
|
|
395
437
|
retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
|
|
438
|
+
retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
|
|
439
|
+
if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
|
|
440
|
+
else None,
|
|
441
|
+
retain_llm_max_retries=int(os.getenv(ENV_RETAIN_LLM_MAX_RETRIES))
|
|
442
|
+
if os.getenv(ENV_RETAIN_LLM_MAX_RETRIES)
|
|
443
|
+
else None,
|
|
444
|
+
retain_llm_initial_backoff=float(os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF))
|
|
445
|
+
if os.getenv(ENV_RETAIN_LLM_INITIAL_BACKOFF)
|
|
446
|
+
else None,
|
|
447
|
+
retain_llm_max_backoff=float(os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF))
|
|
448
|
+
if os.getenv(ENV_RETAIN_LLM_MAX_BACKOFF)
|
|
449
|
+
else None,
|
|
450
|
+
retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
|
|
396
451
|
reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
|
|
397
452
|
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
398
453
|
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
|
|
399
454
|
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
455
|
+
reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
|
|
456
|
+
if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
|
|
457
|
+
else None,
|
|
458
|
+
reflect_llm_max_retries=int(os.getenv(ENV_REFLECT_LLM_MAX_RETRIES))
|
|
459
|
+
if os.getenv(ENV_REFLECT_LLM_MAX_RETRIES)
|
|
460
|
+
else None,
|
|
461
|
+
reflect_llm_initial_backoff=float(os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF))
|
|
462
|
+
if os.getenv(ENV_REFLECT_LLM_INITIAL_BACKOFF)
|
|
463
|
+
else None,
|
|
464
|
+
reflect_llm_max_backoff=float(os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF))
|
|
465
|
+
if os.getenv(ENV_REFLECT_LLM_MAX_BACKOFF)
|
|
466
|
+
else None,
|
|
467
|
+
reflect_llm_timeout=float(os.getenv(ENV_REFLECT_LLM_TIMEOUT))
|
|
468
|
+
if os.getenv(ENV_REFLECT_LLM_TIMEOUT)
|
|
469
|
+
else None,
|
|
400
470
|
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
|
|
401
471
|
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
|
|
402
472
|
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
|
|
403
473
|
consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
|
|
474
|
+
consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
|
|
475
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
|
|
476
|
+
else None,
|
|
477
|
+
consolidation_llm_max_retries=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES))
|
|
478
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_RETRIES)
|
|
479
|
+
else None,
|
|
480
|
+
consolidation_llm_initial_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF))
|
|
481
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_INITIAL_BACKOFF)
|
|
482
|
+
else None,
|
|
483
|
+
consolidation_llm_max_backoff=float(os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF))
|
|
484
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_BACKOFF)
|
|
485
|
+
else None,
|
|
486
|
+
consolidation_llm_timeout=float(os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT))
|
|
487
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT)
|
|
488
|
+
else None,
|
|
404
489
|
# Embeddings
|
|
405
490
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
406
491
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
@@ -52,7 +52,10 @@ class IdleTimeoutMiddleware:
|
|
|
52
52
|
logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
|
|
53
53
|
# Give a moment for any in-flight requests
|
|
54
54
|
await asyncio.sleep(1)
|
|
55
|
-
|
|
55
|
+
# Send SIGTERM to ourselves to trigger graceful shutdown
|
|
56
|
+
import signal
|
|
57
|
+
|
|
58
|
+
os.kill(os.getpid(), signal.SIGTERM)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
class DaemonLock:
|
{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/consolidation/consolidator.py
RENAMED
|
@@ -144,10 +144,14 @@ async def run_consolidation_job(
|
|
|
144
144
|
}
|
|
145
145
|
|
|
146
146
|
batch_num = 0
|
|
147
|
+
last_progress_timings = {} # Track timings at last progress log
|
|
147
148
|
while True:
|
|
148
149
|
batch_num += 1
|
|
149
150
|
batch_start = time.time()
|
|
150
151
|
|
|
152
|
+
# Snapshot timings at batch start for per-batch calculation
|
|
153
|
+
batch_start_timings = perf.timings.copy()
|
|
154
|
+
|
|
151
155
|
# Fetch next batch of unconsolidated memories
|
|
152
156
|
async with pool.acquire() as conn:
|
|
153
157
|
t0 = time.time()
|
|
@@ -217,19 +221,44 @@ async def run_consolidation_job(
|
|
|
217
221
|
elif action == "skipped":
|
|
218
222
|
stats["skipped"] += 1
|
|
219
223
|
|
|
220
|
-
# Log progress periodically
|
|
224
|
+
# Log progress periodically with timing breakdown
|
|
221
225
|
if stats["memories_processed"] % 10 == 0:
|
|
226
|
+
# Calculate timing deltas since last progress log
|
|
227
|
+
timing_parts = []
|
|
228
|
+
for key in ["recall", "llm", "embedding", "db_write"]:
|
|
229
|
+
if key in perf.timings:
|
|
230
|
+
delta = perf.timings[key] - last_progress_timings.get(key, 0)
|
|
231
|
+
timing_parts.append(f"{key}={delta:.2f}s")
|
|
232
|
+
|
|
233
|
+
timing_str = f" | {', '.join(timing_parts)}" if timing_parts else ""
|
|
222
234
|
logger.info(
|
|
223
235
|
f"[CONSOLIDATION] bank={bank_id} progress: "
|
|
224
|
-
f"{stats['memories_processed']}/{total_count} memories processed"
|
|
236
|
+
f"{stats['memories_processed']}/{total_count} memories processed{timing_str}"
|
|
225
237
|
)
|
|
226
238
|
|
|
239
|
+
# Update last progress snapshot
|
|
240
|
+
last_progress_timings = perf.timings.copy()
|
|
241
|
+
|
|
227
242
|
batch_time = time.time() - batch_start
|
|
228
243
|
perf.log(
|
|
229
244
|
f"[2] Batch {batch_num}: {len(memories)} memories in {batch_time:.3f}s "
|
|
230
245
|
f"(avg {batch_time / len(memories):.3f}s/memory)"
|
|
231
246
|
)
|
|
232
247
|
|
|
248
|
+
# Log timing breakdown after each batch (delta from batch start)
|
|
249
|
+
timing_parts = []
|
|
250
|
+
for key in ["recall", "llm", "embedding", "db_write"]:
|
|
251
|
+
if key in perf.timings:
|
|
252
|
+
delta = perf.timings[key] - batch_start_timings.get(key, 0)
|
|
253
|
+
timing_parts.append(f"{key}={delta:.3f}s")
|
|
254
|
+
|
|
255
|
+
if timing_parts:
|
|
256
|
+
avg_per_memory = batch_time / len(memories) if memories else 0
|
|
257
|
+
logger.info(
|
|
258
|
+
f"[CONSOLIDATION] bank={bank_id} batch {batch_num}/{len(memories)} memories: "
|
|
259
|
+
f"{', '.join(timing_parts)} | avg={avg_per_memory:.3f}s/memory"
|
|
260
|
+
)
|
|
261
|
+
|
|
233
262
|
# Build summary
|
|
234
263
|
perf.log(
|
|
235
264
|
f"[3] Results: {stats['memories_processed']} memories -> "
|
|
@@ -178,108 +178,16 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
178
178
|
else:
|
|
179
179
|
logger.info("Reranker: local provider initialized (using existing executor)")
|
|
180
180
|
|
|
181
|
-
def
|
|
182
|
-
"""
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
On macOS, long-running daemons can lose XPC connections to system services
|
|
186
|
-
when the process is idle for extended periods.
|
|
187
|
-
"""
|
|
188
|
-
error_str = str(error).lower()
|
|
189
|
-
return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
|
|
190
|
-
|
|
191
|
-
def _reinitialize_model_sync(self) -> None:
|
|
192
|
-
"""
|
|
193
|
-
Clear and reinitialize the cross-encoder model synchronously.
|
|
194
|
-
|
|
195
|
-
This is used to recover from XPC errors on macOS where the
|
|
196
|
-
PyTorch/MPS backend loses its connection to system services.
|
|
197
|
-
"""
|
|
198
|
-
logger.warning(f"Reinitializing reranker model {self.model_name} due to backend error")
|
|
199
|
-
|
|
200
|
-
# Clear existing model
|
|
201
|
-
self._model = None
|
|
202
|
-
|
|
203
|
-
# Force garbage collection to free resources
|
|
204
|
-
import gc
|
|
205
|
-
|
|
206
|
-
import torch
|
|
207
|
-
|
|
208
|
-
gc.collect()
|
|
209
|
-
|
|
210
|
-
# If using CUDA/MPS, clear the cache
|
|
211
|
-
if torch.cuda.is_available():
|
|
212
|
-
torch.cuda.empty_cache()
|
|
213
|
-
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
214
|
-
try:
|
|
215
|
-
torch.mps.empty_cache()
|
|
216
|
-
except AttributeError:
|
|
217
|
-
pass # Method might not exist in all PyTorch versions
|
|
218
|
-
|
|
219
|
-
# Reinitialize the model
|
|
220
|
-
try:
|
|
221
|
-
from sentence_transformers import CrossEncoder
|
|
222
|
-
except ImportError:
|
|
223
|
-
raise ImportError(
|
|
224
|
-
"sentence-transformers is required for LocalSTCrossEncoder. "
|
|
225
|
-
"Install it with: pip install sentence-transformers"
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
# Determine device based on hardware availability
|
|
229
|
-
if self.force_cpu:
|
|
230
|
-
device = "cpu"
|
|
231
|
-
else:
|
|
232
|
-
# Wrap in try-except to gracefully handle any device detection issues
|
|
233
|
-
device = "cpu" # Default to CPU
|
|
234
|
-
try:
|
|
235
|
-
has_gpu = torch.cuda.is_available() or (
|
|
236
|
-
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
237
|
-
)
|
|
238
|
-
if has_gpu:
|
|
239
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
240
|
-
except Exception as e:
|
|
241
|
-
logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
|
|
242
|
-
|
|
243
|
-
self._model = CrossEncoder(
|
|
244
|
-
self.model_name,
|
|
245
|
-
device=device,
|
|
246
|
-
model_kwargs={"low_cpu_mem_usage": False},
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
logger.info("Reranker: local provider reinitialized successfully")
|
|
250
|
-
|
|
251
|
-
def _predict_with_recovery(self, pairs: list[tuple[str, str]]) -> list[float]:
|
|
252
|
-
"""
|
|
253
|
-
Predict with automatic recovery from XPC errors.
|
|
254
|
-
|
|
255
|
-
This runs synchronously in the thread pool.
|
|
256
|
-
"""
|
|
257
|
-
max_retries = 1
|
|
258
|
-
for attempt in range(max_retries + 1):
|
|
259
|
-
try:
|
|
260
|
-
scores = self._model.predict(pairs, show_progress_bar=False)
|
|
261
|
-
return scores.tolist() if hasattr(scores, "tolist") else list(scores)
|
|
262
|
-
except Exception as e:
|
|
263
|
-
# Check if this is an XPC error (macOS daemon issue)
|
|
264
|
-
if self._is_xpc_error(e) and attempt < max_retries:
|
|
265
|
-
logger.warning(f"XPC error detected in reranker (attempt {attempt + 1}): {e}")
|
|
266
|
-
try:
|
|
267
|
-
self._reinitialize_model_sync()
|
|
268
|
-
logger.info("Reranker reinitialized successfully, retrying prediction")
|
|
269
|
-
continue
|
|
270
|
-
except Exception as reinit_error:
|
|
271
|
-
logger.error(f"Failed to reinitialize reranker: {reinit_error}")
|
|
272
|
-
raise Exception(f"Failed to recover from XPC error: {str(e)}")
|
|
273
|
-
else:
|
|
274
|
-
# Not an XPC error or out of retries
|
|
275
|
-
raise
|
|
181
|
+
def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
|
|
182
|
+
"""Synchronous prediction wrapper for thread pool execution."""
|
|
183
|
+
scores = self._model.predict(pairs, show_progress_bar=False)
|
|
184
|
+
return scores.tolist() if hasattr(scores, "tolist") else list(scores)
|
|
276
185
|
|
|
277
186
|
async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
|
|
278
187
|
"""
|
|
279
188
|
Score query-document pairs for relevance.
|
|
280
189
|
|
|
281
190
|
Uses a dedicated thread pool with limited workers to prevent CPU thrashing.
|
|
282
|
-
Automatically recovers from XPC errors on macOS by reinitializing the model.
|
|
283
191
|
|
|
284
192
|
Args:
|
|
285
193
|
pairs: List of (query, document) tuples to score
|
|
@@ -294,7 +202,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
294
202
|
loop = asyncio.get_event_loop()
|
|
295
203
|
return await loop.run_in_executor(
|
|
296
204
|
LocalSTCrossEncoder._executor,
|
|
297
|
-
self.
|
|
205
|
+
self._predict_sync,
|
|
298
206
|
pairs,
|
|
299
207
|
)
|
|
300
208
|
|
|
@@ -166,82 +166,10 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
166
166
|
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
167
167
|
logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
|
|
168
168
|
|
|
169
|
-
def _is_xpc_error(self, error: Exception) -> bool:
|
|
170
|
-
"""
|
|
171
|
-
Check if an error is an XPC connection error (macOS daemon issue).
|
|
172
|
-
|
|
173
|
-
On macOS, long-running daemons can lose XPC connections to system services
|
|
174
|
-
when the process is idle for extended periods.
|
|
175
|
-
"""
|
|
176
|
-
error_str = str(error).lower()
|
|
177
|
-
return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
|
|
178
|
-
|
|
179
|
-
def _reinitialize_model_sync(self) -> None:
|
|
180
|
-
"""
|
|
181
|
-
Clear and reinitialize the embedding model synchronously.
|
|
182
|
-
|
|
183
|
-
This is used to recover from XPC errors on macOS where the
|
|
184
|
-
PyTorch/MPS backend loses its connection to system services.
|
|
185
|
-
"""
|
|
186
|
-
logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
|
|
187
|
-
|
|
188
|
-
# Clear existing model
|
|
189
|
-
self._model = None
|
|
190
|
-
|
|
191
|
-
# Force garbage collection to free resources
|
|
192
|
-
import gc
|
|
193
|
-
|
|
194
|
-
import torch
|
|
195
|
-
|
|
196
|
-
gc.collect()
|
|
197
|
-
|
|
198
|
-
# If using CUDA/MPS, clear the cache
|
|
199
|
-
if torch.cuda.is_available():
|
|
200
|
-
torch.cuda.empty_cache()
|
|
201
|
-
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
202
|
-
try:
|
|
203
|
-
torch.mps.empty_cache()
|
|
204
|
-
except AttributeError:
|
|
205
|
-
pass # Method might not exist in all PyTorch versions
|
|
206
|
-
|
|
207
|
-
# Reinitialize the model (inline version of initialize() but synchronous)
|
|
208
|
-
try:
|
|
209
|
-
from sentence_transformers import SentenceTransformer
|
|
210
|
-
except ImportError:
|
|
211
|
-
raise ImportError(
|
|
212
|
-
"sentence-transformers is required for LocalSTEmbeddings. "
|
|
213
|
-
"Install it with: pip install sentence-transformers"
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
# Determine device based on hardware availability
|
|
217
|
-
if self.force_cpu:
|
|
218
|
-
device = "cpu"
|
|
219
|
-
else:
|
|
220
|
-
# Wrap in try-except to gracefully handle any device detection issues
|
|
221
|
-
device = "cpu" # Default to CPU
|
|
222
|
-
try:
|
|
223
|
-
has_gpu = torch.cuda.is_available() or (
|
|
224
|
-
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
225
|
-
)
|
|
226
|
-
if has_gpu:
|
|
227
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
228
|
-
except Exception as e:
|
|
229
|
-
logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
|
|
230
|
-
|
|
231
|
-
self._model = SentenceTransformer(
|
|
232
|
-
self.model_name,
|
|
233
|
-
device=device,
|
|
234
|
-
model_kwargs={"low_cpu_mem_usage": False},
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
logger.info("Embeddings: local provider reinitialized successfully")
|
|
238
|
-
|
|
239
169
|
def encode(self, texts: list[str]) -> list[list[float]]:
|
|
240
170
|
"""
|
|
241
171
|
Generate embeddings for a list of texts.
|
|
242
172
|
|
|
243
|
-
Automatically recovers from XPC errors on macOS by reinitializing the model.
|
|
244
|
-
|
|
245
173
|
Args:
|
|
246
174
|
texts: List of text strings to encode
|
|
247
175
|
|
|
@@ -251,26 +179,8 @@ class LocalSTEmbeddings(Embeddings):
|
|
|
251
179
|
if self._model is None:
|
|
252
180
|
raise RuntimeError("Embeddings not initialized. Call initialize() first.")
|
|
253
181
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
for attempt in range(max_retries + 1):
|
|
257
|
-
try:
|
|
258
|
-
embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
|
259
|
-
return [emb.tolist() for emb in embeddings]
|
|
260
|
-
except Exception as e:
|
|
261
|
-
# Check if this is an XPC error (macOS daemon issue)
|
|
262
|
-
if self._is_xpc_error(e) and attempt < max_retries:
|
|
263
|
-
logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
|
|
264
|
-
try:
|
|
265
|
-
self._reinitialize_model_sync()
|
|
266
|
-
logger.info("Model reinitialized successfully, retrying embedding generation")
|
|
267
|
-
continue
|
|
268
|
-
except Exception as reinit_error:
|
|
269
|
-
logger.error(f"Failed to reinitialize model: {reinit_error}")
|
|
270
|
-
raise Exception(f"Failed to recover from XPC error: {str(e)}")
|
|
271
|
-
else:
|
|
272
|
-
# Not an XPC error or out of retries
|
|
273
|
-
raise
|
|
182
|
+
embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
|
|
183
|
+
return [emb.tolist() for emb in embeddings]
|
|
274
184
|
|
|
275
185
|
|
|
276
186
|
class RemoteTEIEmbeddings(Embeddings):
|
|
@@ -782,12 +782,28 @@ Text:
|
|
|
782
782
|
usage = TokenUsage() # Track cumulative usage across retries
|
|
783
783
|
for attempt in range(max_retries):
|
|
784
784
|
try:
|
|
785
|
+
# Use retain-specific overrides if set, otherwise fall back to global LLM config
|
|
786
|
+
max_retries = (
|
|
787
|
+
config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
|
|
788
|
+
)
|
|
789
|
+
initial_backoff = (
|
|
790
|
+
config.retain_llm_initial_backoff
|
|
791
|
+
if config.retain_llm_initial_backoff is not None
|
|
792
|
+
else config.llm_initial_backoff
|
|
793
|
+
)
|
|
794
|
+
max_backoff = (
|
|
795
|
+
config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
|
|
796
|
+
)
|
|
797
|
+
|
|
785
798
|
extraction_response_json, call_usage = await llm_config.call(
|
|
786
799
|
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
|
|
787
800
|
response_format=response_schema,
|
|
788
801
|
scope="memory_extract_facts",
|
|
789
802
|
temperature=0.1,
|
|
790
803
|
max_completion_tokens=config.retain_max_completion_tokens,
|
|
804
|
+
max_retries=max_retries,
|
|
805
|
+
initial_backoff=initial_backoff,
|
|
806
|
+
max_backoff=max_backoff,
|
|
791
807
|
skip_validation=True, # Get raw JSON, we'll validate leniently
|
|
792
808
|
return_usage=True,
|
|
793
809
|
)
|
|
@@ -140,13 +140,6 @@ def main():
|
|
|
140
140
|
args.port = DEFAULT_DAEMON_PORT
|
|
141
141
|
args.host = "127.0.0.1" # Only bind to localhost for security
|
|
142
142
|
|
|
143
|
-
# Force CPU mode for daemon to avoid macOS MPS/XPC issues
|
|
144
|
-
# MPS (Metal Performance Shaders) has unstable XPC connections in background processes
|
|
145
|
-
# that can cause assertion failures and process crashes at the C++ level
|
|
146
|
-
# (which Python exception handlers cannot catch)
|
|
147
|
-
os.environ["HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"] = "1"
|
|
148
|
-
os.environ["HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"] = "1"
|
|
149
|
-
|
|
150
143
|
# Check if another daemon is already running
|
|
151
144
|
daemon_lock = DaemonLock()
|
|
152
145
|
if not daemon_lock.acquire():
|
|
@@ -183,19 +176,37 @@ def main():
|
|
|
183
176
|
llm_model=config.llm_model,
|
|
184
177
|
llm_base_url=config.llm_base_url,
|
|
185
178
|
llm_max_concurrent=config.llm_max_concurrent,
|
|
179
|
+
llm_max_retries=config.llm_max_retries,
|
|
180
|
+
llm_initial_backoff=config.llm_initial_backoff,
|
|
181
|
+
llm_max_backoff=config.llm_max_backoff,
|
|
186
182
|
llm_timeout=config.llm_timeout,
|
|
187
183
|
retain_llm_provider=config.retain_llm_provider,
|
|
188
184
|
retain_llm_api_key=config.retain_llm_api_key,
|
|
189
185
|
retain_llm_model=config.retain_llm_model,
|
|
190
186
|
retain_llm_base_url=config.retain_llm_base_url,
|
|
187
|
+
retain_llm_max_concurrent=config.retain_llm_max_concurrent,
|
|
188
|
+
retain_llm_max_retries=config.retain_llm_max_retries,
|
|
189
|
+
retain_llm_initial_backoff=config.retain_llm_initial_backoff,
|
|
190
|
+
retain_llm_max_backoff=config.retain_llm_max_backoff,
|
|
191
|
+
retain_llm_timeout=config.retain_llm_timeout,
|
|
191
192
|
reflect_llm_provider=config.reflect_llm_provider,
|
|
192
193
|
reflect_llm_api_key=config.reflect_llm_api_key,
|
|
193
194
|
reflect_llm_model=config.reflect_llm_model,
|
|
194
195
|
reflect_llm_base_url=config.reflect_llm_base_url,
|
|
196
|
+
reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
|
|
197
|
+
reflect_llm_max_retries=config.reflect_llm_max_retries,
|
|
198
|
+
reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
|
|
199
|
+
reflect_llm_max_backoff=config.reflect_llm_max_backoff,
|
|
200
|
+
reflect_llm_timeout=config.reflect_llm_timeout,
|
|
195
201
|
consolidation_llm_provider=config.consolidation_llm_provider,
|
|
196
202
|
consolidation_llm_api_key=config.consolidation_llm_api_key,
|
|
197
203
|
consolidation_llm_model=config.consolidation_llm_model,
|
|
198
204
|
consolidation_llm_base_url=config.consolidation_llm_base_url,
|
|
205
|
+
consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
|
|
206
|
+
consolidation_llm_max_retries=config.consolidation_llm_max_retries,
|
|
207
|
+
consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
|
|
208
|
+
consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
|
|
209
|
+
consolidation_llm_timeout=config.consolidation_llm_timeout,
|
|
199
210
|
embeddings_provider=config.embeddings_provider,
|
|
200
211
|
embeddings_local_model=config.embeddings_local_model,
|
|
201
212
|
embeddings_local_force_cpu=config.embeddings_local_force_cpu,
|
|
@@ -353,6 +364,7 @@ def main():
|
|
|
353
364
|
# Start idle checker in daemon mode
|
|
354
365
|
if idle_middleware is not None:
|
|
355
366
|
# Start the idle checker in a background thread with its own event loop
|
|
367
|
+
import logging
|
|
356
368
|
import threading
|
|
357
369
|
|
|
358
370
|
def run_idle_checker():
|
|
@@ -363,8 +375,8 @@ def main():
|
|
|
363
375
|
loop = asyncio.new_event_loop()
|
|
364
376
|
asyncio.set_event_loop(loop)
|
|
365
377
|
loop.run_until_complete(idle_middleware._check_idle())
|
|
366
|
-
except Exception:
|
|
367
|
-
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logging.error(f"Idle checker error: {e}", exc_info=True)
|
|
368
380
|
|
|
369
381
|
threading.Thread(target=run_idle_checker, daemon=True).start()
|
|
370
382
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/embedding_processing.py
RENAMED
|
File without changes
|
|
File without changes
|
{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/retain/entity_processing.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/link_expansion_retrieval.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hindsight_api-0.4.1 → hindsight_api-0.4.2}/hindsight_api/engine/search/temporal_extraction.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|