hindsight-api 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/.gitignore +4 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/PKG-INFO +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/__init__.py +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/api/http.py +3 -2
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/config.py +29 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/consolidator.py +114 -47
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/prompts.py +21 -13
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/cross_encoder.py +50 -24
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/embeddings.py +45 -19
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/memory_engine.py +11 -5
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/tools.py +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/builtin/tenant.py +8 -5
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/main.py +12 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/pyproject.toml +1 -1
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/README.md +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/admin/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/admin/cli.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/README +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/env.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/script.py.mako +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/api/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/api/mcp.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/banner.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/daemon.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/db_budget.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/db_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/directives/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/directives/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/entity_resolver.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/interface.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/llm_wrapper.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/mental_models/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/mental_models/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/query_analyzer.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/agent.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/observations.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/prompts.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/tools_schema.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/response_models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/bank_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/chunk_storage.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/deduplication.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/embedding_processing.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/embedding_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/entity_processing.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/fact_extraction.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/fact_storage.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/link_creation.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/link_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/orchestrator.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/types.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/fusion.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/graph_retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/reranking.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/retrieval.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/tags.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/temporal_extraction.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/think_utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/trace.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/tracer.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/types.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/task_backend.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/utils.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/base.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/builtin/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/context.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/http.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/loader.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/operation_validator.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/tenant.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/mcp_local.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/mcp_tools.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/metrics.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/migrations.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/models.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/pg0.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/server.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/worker/__init__.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/worker/main.py +0 -0
- {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/worker/poller.py +0 -0
|
@@ -45,9 +45,12 @@ hindsight-docs/static/llms-full.txt
|
|
|
45
45
|
|
|
46
46
|
hindsight-dev/benchmarks/locomo/results/
|
|
47
47
|
hindsight-dev/benchmarks/longmemeval/results/
|
|
48
|
+
hindsight-dev/benchmarks/consolidation/results/
|
|
49
|
+
benchmarks/results/
|
|
48
50
|
hindsight-cli/target
|
|
49
51
|
hindsight-clients/rust/target
|
|
50
52
|
.claude
|
|
51
53
|
whats-next.md
|
|
52
54
|
TASK.md
|
|
53
|
-
|
|
55
|
+
# Changelog is now tracked in hindsight-docs/src/pages/changelog.md
|
|
56
|
+
# CHANGELOG.md
|
|
@@ -1323,7 +1323,7 @@ class VersionResponse(BaseModel):
|
|
|
1323
1323
|
model_config = ConfigDict(
|
|
1324
1324
|
json_schema_extra={
|
|
1325
1325
|
"example": {
|
|
1326
|
-
"api_version": "
|
|
1326
|
+
"api_version": "0.4.0",
|
|
1327
1327
|
"features": {
|
|
1328
1328
|
"observations": False,
|
|
1329
1329
|
"mcp": True,
|
|
@@ -1567,11 +1567,12 @@ def _register_routes(app: FastAPI):
|
|
|
1567
1567
|
Returns version info and feature flags that can be used by clients
|
|
1568
1568
|
to determine which capabilities are available.
|
|
1569
1569
|
"""
|
|
1570
|
+
from hindsight_api import __version__
|
|
1570
1571
|
from hindsight_api.config import get_config
|
|
1571
1572
|
|
|
1572
1573
|
config = get_config()
|
|
1573
1574
|
return VersionResponse(
|
|
1574
|
-
api_version=
|
|
1575
|
+
api_version=__version__,
|
|
1575
1576
|
features=FeaturesInfo(
|
|
1576
1577
|
observations=config.enable_observations,
|
|
1577
1578
|
mcp=config.mcp_enabled,
|
|
@@ -20,6 +20,7 @@ logger = logging.getLogger(__name__)
|
|
|
20
20
|
|
|
21
21
|
# Environment variable names
|
|
22
22
|
ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
|
|
23
|
+
ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
|
|
23
24
|
ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
|
|
24
25
|
ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
|
|
25
26
|
ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
|
|
@@ -46,6 +47,7 @@ ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
|
|
|
46
47
|
|
|
47
48
|
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
|
|
48
49
|
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
|
|
50
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
|
|
49
51
|
ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
|
|
50
52
|
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
|
|
51
53
|
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
|
|
@@ -65,6 +67,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
|
|
|
65
67
|
|
|
66
68
|
ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
|
|
67
69
|
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
|
|
70
|
+
ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
|
|
68
71
|
ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
|
|
69
72
|
ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
|
|
70
73
|
ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
|
|
@@ -98,6 +101,7 @@ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
|
98
101
|
# Observations settings (consolidated knowledge from facts)
|
|
99
102
|
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
100
103
|
ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
|
|
104
|
+
ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
|
|
101
105
|
|
|
102
106
|
# Optimization flags
|
|
103
107
|
ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
|
|
@@ -125,6 +129,7 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
|
125
129
|
|
|
126
130
|
# Default values
|
|
127
131
|
DEFAULT_DATABASE_URL = "pg0"
|
|
132
|
+
DEFAULT_DATABASE_SCHEMA = "public"
|
|
128
133
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
129
134
|
DEFAULT_LLM_MODEL = "gpt-5-mini"
|
|
130
135
|
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
@@ -132,11 +137,13 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
|
132
137
|
|
|
133
138
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
134
139
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
140
|
+
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
|
|
135
141
|
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
|
|
136
142
|
DEFAULT_EMBEDDING_DIMENSION = 384
|
|
137
143
|
|
|
138
144
|
DEFAULT_RERANKER_PROVIDER = "local"
|
|
139
145
|
DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
146
|
+
DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
|
|
140
147
|
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
|
|
141
148
|
DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
|
|
142
149
|
DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
|
|
@@ -177,6 +184,7 @@ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (a
|
|
|
177
184
|
# Observations defaults (consolidated knowledge from facts)
|
|
178
185
|
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
179
186
|
DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
|
|
187
|
+
DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
|
|
180
188
|
|
|
181
189
|
# Database migrations
|
|
182
190
|
DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
|
|
@@ -270,6 +278,7 @@ class HindsightConfig:
|
|
|
270
278
|
|
|
271
279
|
# Database
|
|
272
280
|
database_url: str
|
|
281
|
+
database_schema: str
|
|
273
282
|
|
|
274
283
|
# LLM (default, used as fallback for per-operation config)
|
|
275
284
|
llm_provider: str
|
|
@@ -298,6 +307,7 @@ class HindsightConfig:
|
|
|
298
307
|
# Embeddings
|
|
299
308
|
embeddings_provider: str
|
|
300
309
|
embeddings_local_model: str
|
|
310
|
+
embeddings_local_force_cpu: bool
|
|
301
311
|
embeddings_tei_url: str | None
|
|
302
312
|
embeddings_openai_base_url: str | None
|
|
303
313
|
embeddings_cohere_base_url: str | None
|
|
@@ -305,6 +315,8 @@ class HindsightConfig:
|
|
|
305
315
|
# Reranker
|
|
306
316
|
reranker_provider: str
|
|
307
317
|
reranker_local_model: str
|
|
318
|
+
reranker_local_force_cpu: bool
|
|
319
|
+
reranker_local_max_concurrent: int
|
|
308
320
|
reranker_tei_url: str | None
|
|
309
321
|
reranker_tei_batch_size: int
|
|
310
322
|
reranker_tei_max_concurrent: int
|
|
@@ -336,6 +348,7 @@ class HindsightConfig:
|
|
|
336
348
|
# Observations settings (consolidated knowledge from facts)
|
|
337
349
|
enable_observations: bool
|
|
338
350
|
consolidation_batch_size: int
|
|
351
|
+
consolidation_max_tokens: int
|
|
339
352
|
|
|
340
353
|
# Optimization flags
|
|
341
354
|
skip_llm_verification: bool
|
|
@@ -367,6 +380,7 @@ class HindsightConfig:
|
|
|
367
380
|
return cls(
|
|
368
381
|
# Database
|
|
369
382
|
database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
|
|
383
|
+
database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
|
|
370
384
|
# LLM
|
|
371
385
|
llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
|
|
372
386
|
llm_api_key=os.getenv(ENV_LLM_API_KEY),
|
|
@@ -390,12 +404,23 @@ class HindsightConfig:
|
|
|
390
404
|
# Embeddings
|
|
391
405
|
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
|
|
392
406
|
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
|
|
407
|
+
embeddings_local_force_cpu=os.getenv(
|
|
408
|
+
ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
|
|
409
|
+
).lower()
|
|
410
|
+
in ("true", "1"),
|
|
393
411
|
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
|
|
394
412
|
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
|
|
395
413
|
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
|
|
396
414
|
# Reranker
|
|
397
415
|
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
|
|
398
416
|
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
|
|
417
|
+
reranker_local_force_cpu=os.getenv(
|
|
418
|
+
ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
|
|
419
|
+
).lower()
|
|
420
|
+
in ("true", "1"),
|
|
421
|
+
reranker_local_max_concurrent=int(
|
|
422
|
+
os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
|
|
423
|
+
),
|
|
399
424
|
reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
|
|
400
425
|
reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
|
|
401
426
|
reranker_tei_max_concurrent=int(
|
|
@@ -444,6 +469,9 @@ class HindsightConfig:
|
|
|
444
469
|
consolidation_batch_size=int(
|
|
445
470
|
os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
|
|
446
471
|
),
|
|
472
|
+
consolidation_max_tokens=int(
|
|
473
|
+
os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
|
|
474
|
+
),
|
|
447
475
|
# Database migrations
|
|
448
476
|
run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
|
|
449
477
|
# Database connection pool
|
|
@@ -515,7 +543,7 @@ class HindsightConfig:
|
|
|
515
543
|
|
|
516
544
|
def log_config(self) -> None:
|
|
517
545
|
"""Log the current configuration (without sensitive values)."""
|
|
518
|
-
logger.info(f"Database: {self.database_url}")
|
|
546
|
+
logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
|
|
519
547
|
logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
|
|
520
548
|
if self.retain_llm_provider or self.retain_llm_model:
|
|
521
549
|
retain_provider = self.retain_llm_provider or self.llm_provider
|
{hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/consolidator.py
RENAMED
|
@@ -639,28 +639,27 @@ async def _find_related_observations(
|
|
|
639
639
|
request_context: "RequestContext",
|
|
640
640
|
) -> list[dict[str, Any]]:
|
|
641
641
|
"""
|
|
642
|
-
Find observations related to the given query using
|
|
642
|
+
Find observations related to the given query using optimized recall.
|
|
643
643
|
|
|
644
644
|
IMPORTANT: We do NOT filter by tags here. Consolidation needs to see ALL
|
|
645
645
|
potentially related observations regardless of scope, so the LLM can
|
|
646
646
|
decide on tag routing (same scope update vs cross-scope create).
|
|
647
647
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
- BM25 text search (keyword matching)
|
|
651
|
-
- Entity-based retrieval (shared entities)
|
|
652
|
-
- Graph traversal (connected via entity links)
|
|
648
|
+
Uses max_tokens to naturally limit observations (no artificial count limit).
|
|
649
|
+
Includes source memories with dates for LLM context.
|
|
653
650
|
|
|
654
651
|
Returns:
|
|
655
|
-
List of related observations with their tags
|
|
652
|
+
List of related observations with their tags, source memories, and dates
|
|
656
653
|
"""
|
|
657
|
-
# Use recall to find related observations
|
|
658
|
-
#
|
|
659
|
-
|
|
654
|
+
# Use recall to find related observations with token budget
|
|
655
|
+
# max_tokens naturally limits how many observations are returned
|
|
656
|
+
from ...config import get_config
|
|
657
|
+
|
|
658
|
+
config = get_config()
|
|
660
659
|
recall_result = await memory_engine.recall_async(
|
|
661
660
|
bank_id=bank_id,
|
|
662
661
|
query=query,
|
|
663
|
-
max_tokens=
|
|
662
|
+
max_tokens=config.consolidation_max_tokens, # Token budget for observations (configurable)
|
|
664
663
|
fact_type=["observation"], # Only retrieve observations
|
|
665
664
|
request_context=request_context,
|
|
666
665
|
_quiet=True, # Suppress logging
|
|
@@ -668,43 +667,82 @@ async def _find_related_observations(
|
|
|
668
667
|
)
|
|
669
668
|
|
|
670
669
|
# If no observations returned, return empty list
|
|
671
|
-
# When fact_type=["observation"], results come back in `results` field
|
|
672
670
|
if not recall_result.results:
|
|
673
671
|
return []
|
|
674
672
|
|
|
675
|
-
#
|
|
673
|
+
# Batch fetch all observations in a single query (no artificial limit)
|
|
674
|
+
observation_ids = [uuid.UUID(obs.id) for obs in recall_result.results]
|
|
675
|
+
|
|
676
|
+
rows = await conn.fetch(
|
|
677
|
+
f"""
|
|
678
|
+
SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at,
|
|
679
|
+
occurred_start, occurred_end, mentioned_at
|
|
680
|
+
FROM {fq_table("memory_units")}
|
|
681
|
+
WHERE id = ANY($1) AND bank_id = $2 AND fact_type = 'observation'
|
|
682
|
+
""",
|
|
683
|
+
observation_ids,
|
|
684
|
+
bank_id,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# Build results list preserving recall order
|
|
688
|
+
id_to_row = {row["id"]: row for row in rows}
|
|
676
689
|
results = []
|
|
677
|
-
for obs in recall_result.results:
|
|
678
|
-
# Fetch full observation data from DB to get history, source_memory_ids, tags
|
|
679
|
-
row = await conn.fetchrow(
|
|
680
|
-
f"""
|
|
681
|
-
SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
|
|
682
|
-
FROM {fq_table("memory_units")}
|
|
683
|
-
WHERE id = $1 AND bank_id = $2 AND fact_type = 'observation'
|
|
684
|
-
""",
|
|
685
|
-
uuid.UUID(obs.id),
|
|
686
|
-
bank_id,
|
|
687
|
-
)
|
|
688
690
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
691
|
+
for obs in recall_result.results:
|
|
692
|
+
obs_id = uuid.UUID(obs.id)
|
|
693
|
+
if obs_id not in id_to_row:
|
|
694
|
+
continue
|
|
695
|
+
|
|
696
|
+
row = id_to_row[obs_id]
|
|
697
|
+
history = row["history"]
|
|
698
|
+
if isinstance(history, str):
|
|
699
|
+
history = json.loads(history)
|
|
700
|
+
elif history is None:
|
|
701
|
+
history = []
|
|
702
|
+
|
|
703
|
+
# Fetch source memories to include their text and dates
|
|
704
|
+
source_memory_ids = row["source_memory_ids"] or []
|
|
705
|
+
source_memories = []
|
|
706
|
+
|
|
707
|
+
if source_memory_ids:
|
|
708
|
+
source_rows = await conn.fetch(
|
|
709
|
+
f"""
|
|
710
|
+
SELECT text, occurred_start, occurred_end, mentioned_at, event_date
|
|
711
|
+
FROM {fq_table("memory_units")}
|
|
712
|
+
WHERE id = ANY($1) AND bank_id = $2
|
|
713
|
+
ORDER BY created_at ASC
|
|
714
|
+
LIMIT 5
|
|
715
|
+
""",
|
|
716
|
+
source_memory_ids[:5], # Limit to first 5 source memories for token efficiency
|
|
717
|
+
bank_id,
|
|
706
718
|
)
|
|
707
719
|
|
|
720
|
+
for src_row in source_rows:
|
|
721
|
+
source_memories.append(
|
|
722
|
+
{
|
|
723
|
+
"text": src_row["text"],
|
|
724
|
+
"occurred_start": src_row["occurred_start"],
|
|
725
|
+
"occurred_end": src_row["occurred_end"],
|
|
726
|
+
"mentioned_at": src_row["mentioned_at"],
|
|
727
|
+
"event_date": src_row["event_date"],
|
|
728
|
+
}
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
results.append(
|
|
732
|
+
{
|
|
733
|
+
"id": row["id"],
|
|
734
|
+
"text": row["text"],
|
|
735
|
+
"proof_count": row["proof_count"] or 1,
|
|
736
|
+
"tags": row["tags"] or [],
|
|
737
|
+
"source_memories": source_memories,
|
|
738
|
+
"occurred_start": row["occurred_start"],
|
|
739
|
+
"occurred_end": row["occurred_end"],
|
|
740
|
+
"mentioned_at": row["mentioned_at"],
|
|
741
|
+
"created_at": row["created_at"],
|
|
742
|
+
"updated_at": row["updated_at"],
|
|
743
|
+
}
|
|
744
|
+
)
|
|
745
|
+
|
|
708
746
|
return results
|
|
709
747
|
|
|
710
748
|
|
|
@@ -732,14 +770,43 @@ async def _consolidate_with_llm(
|
|
|
732
770
|
- {"action": "create", "text": "...", "reason": "..."}
|
|
733
771
|
- [] if fact is purely ephemeral (no durable knowledge)
|
|
734
772
|
"""
|
|
735
|
-
# Format observations
|
|
773
|
+
# Format observations as JSON with source memories and dates
|
|
736
774
|
if observations:
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
775
|
+
obs_list = []
|
|
776
|
+
for obs in observations:
|
|
777
|
+
obs_data = {
|
|
778
|
+
"id": str(obs["id"]),
|
|
779
|
+
"text": obs["text"],
|
|
780
|
+
"proof_count": obs["proof_count"],
|
|
781
|
+
"tags": obs["tags"],
|
|
782
|
+
"created_at": obs["created_at"].isoformat() if obs.get("created_at") else None,
|
|
783
|
+
"updated_at": obs["updated_at"].isoformat() if obs.get("updated_at") else None,
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
# Include temporal info if available
|
|
787
|
+
if obs.get("occurred_start"):
|
|
788
|
+
obs_data["occurred_start"] = obs["occurred_start"].isoformat()
|
|
789
|
+
if obs.get("occurred_end"):
|
|
790
|
+
obs_data["occurred_end"] = obs["occurred_end"].isoformat()
|
|
791
|
+
if obs.get("mentioned_at"):
|
|
792
|
+
obs_data["mentioned_at"] = obs["mentioned_at"].isoformat()
|
|
793
|
+
|
|
794
|
+
# Include source memories (up to 3 for brevity)
|
|
795
|
+
if obs.get("source_memories"):
|
|
796
|
+
obs_data["source_memories"] = [
|
|
797
|
+
{
|
|
798
|
+
"text": sm["text"],
|
|
799
|
+
"event_date": sm["event_date"].isoformat() if sm.get("event_date") else None,
|
|
800
|
+
"occurred_start": sm["occurred_start"].isoformat() if sm.get("occurred_start") else None,
|
|
801
|
+
}
|
|
802
|
+
for sm in obs["source_memories"][:3] # Limit to 3 for token efficiency
|
|
803
|
+
]
|
|
804
|
+
|
|
805
|
+
obs_list.append(obs_data)
|
|
806
|
+
|
|
807
|
+
observations_text = json.dumps(obs_list, indent=2)
|
|
741
808
|
else:
|
|
742
|
-
observations_text = "
|
|
809
|
+
observations_text = "[]"
|
|
743
810
|
|
|
744
811
|
# Only include mission section if mission is set and not the default
|
|
745
812
|
mission_section = ""
|
|
@@ -47,23 +47,31 @@ CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowle
|
|
|
47
47
|
{mission_section}
|
|
48
48
|
NEW FACT: {fact_text}
|
|
49
49
|
|
|
50
|
-
EXISTING OBSERVATIONS:
|
|
50
|
+
EXISTING OBSERVATIONS (JSON array with source memories and dates):
|
|
51
51
|
{observations_text}
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
Each observation includes:
|
|
54
|
+
- id: unique identifier for updating
|
|
55
|
+
- text: the observation content
|
|
56
|
+
- proof_count: number of supporting memories
|
|
57
|
+
- tags: visibility scope (handled automatically)
|
|
58
|
+
- created_at/updated_at: when observation was created/modified
|
|
59
|
+
- occurred_start/occurred_end: temporal range of source facts
|
|
60
|
+
- source_memories: array of supporting facts with their text and dates
|
|
58
61
|
|
|
59
|
-
|
|
62
|
+
Instructions:
|
|
63
|
+
1. Extract DURABLE KNOWLEDGE from the new fact (not ephemeral state)
|
|
64
|
+
2. Review source_memories in existing observations to understand evidence
|
|
65
|
+
3. Check dates to detect contradictions or updates
|
|
66
|
+
4. Compare with observations:
|
|
67
|
+
- Same topic → UPDATE with learning_id
|
|
68
|
+
- New topic → CREATE new observation
|
|
69
|
+
- Purely ephemeral → return []
|
|
70
|
+
|
|
71
|
+
Output JSON array of actions:
|
|
60
72
|
[
|
|
61
|
-
{{"action": "update", "learning_id": "uuid", "text": "updated
|
|
73
|
+
{{"action": "update", "learning_id": "uuid-from-observations", "text": "updated knowledge", "reason": "..."}},
|
|
62
74
|
{{"action": "create", "text": "new durable knowledge", "reason": "..."}}
|
|
63
75
|
]
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
[]
|
|
67
|
-
|
|
68
|
-
If no observations exist and fact contains durable knowledge:
|
|
69
|
-
[{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""
|
|
77
|
+
Return [] if fact contains no durable knowledge."""
|
|
@@ -20,6 +20,7 @@ from ..config import (
|
|
|
20
20
|
DEFAULT_RERANKER_FLASHRANK_CACHE_DIR,
|
|
21
21
|
DEFAULT_RERANKER_FLASHRANK_MODEL,
|
|
22
22
|
DEFAULT_RERANKER_LITELLM_MODEL,
|
|
23
|
+
DEFAULT_RERANKER_LOCAL_FORCE_CPU,
|
|
23
24
|
DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT,
|
|
24
25
|
DEFAULT_RERANKER_LOCAL_MODEL,
|
|
25
26
|
DEFAULT_RERANKER_PROVIDER,
|
|
@@ -33,6 +34,7 @@ from ..config import (
|
|
|
33
34
|
ENV_RERANKER_FLASHRANK_CACHE_DIR,
|
|
34
35
|
ENV_RERANKER_FLASHRANK_MODEL,
|
|
35
36
|
ENV_RERANKER_LITELLM_MODEL,
|
|
37
|
+
ENV_RERANKER_LOCAL_FORCE_CPU,
|
|
36
38
|
ENV_RERANKER_LOCAL_MAX_CONCURRENT,
|
|
37
39
|
ENV_RERANKER_LOCAL_MODEL,
|
|
38
40
|
ENV_RERANKER_PROVIDER,
|
|
@@ -99,7 +101,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
99
101
|
_executor: ThreadPoolExecutor | None = None
|
|
100
102
|
_max_concurrent: int = 4 # Limit concurrent CPU-bound reranking calls
|
|
101
103
|
|
|
102
|
-
def __init__(self, model_name: str | None = None, max_concurrent: int = 4):
|
|
104
|
+
def __init__(self, model_name: str | None = None, max_concurrent: int = 4, force_cpu: bool = False):
|
|
103
105
|
"""
|
|
104
106
|
Initialize local SentenceTransformers cross-encoder.
|
|
105
107
|
|
|
@@ -108,8 +110,11 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
108
110
|
Default: cross-encoder/ms-marco-MiniLM-L-6-v2
|
|
109
111
|
max_concurrent: Maximum concurrent reranking calls (default: 2).
|
|
110
112
|
Higher values may cause CPU thrashing under load.
|
|
113
|
+
force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
|
|
114
|
+
Default: False
|
|
111
115
|
"""
|
|
112
116
|
self.model_name = model_name or DEFAULT_RERANKER_LOCAL_MODEL
|
|
117
|
+
self.force_cpu = force_cpu
|
|
113
118
|
self._model = None
|
|
114
119
|
LocalSTCrossEncoder._max_concurrent = max_concurrent
|
|
115
120
|
|
|
@@ -139,13 +144,23 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
139
144
|
# after loading, which conflicts with accelerate's device_map handling.
|
|
140
145
|
import torch
|
|
141
146
|
|
|
142
|
-
#
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
if has_gpu:
|
|
146
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
147
|
-
else:
|
|
147
|
+
# Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
|
|
148
|
+
if self.force_cpu:
|
|
148
149
|
device = "cpu"
|
|
150
|
+
logger.info("Reranker: forcing CPU mode (HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU=1)")
|
|
151
|
+
else:
|
|
152
|
+
# Check for GPU (CUDA) or Apple Silicon (MPS)
|
|
153
|
+
# Wrap in try-except to gracefully handle any device detection issues
|
|
154
|
+
# (e.g., in CI environments or when PyTorch is built without GPU support)
|
|
155
|
+
device = "cpu" # Default to CPU
|
|
156
|
+
try:
|
|
157
|
+
has_gpu = torch.cuda.is_available() or (
|
|
158
|
+
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
159
|
+
)
|
|
160
|
+
if has_gpu:
|
|
161
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
|
|
149
164
|
|
|
150
165
|
self._model = CrossEncoder(
|
|
151
166
|
self.model_name,
|
|
@@ -211,12 +226,19 @@ class LocalSTCrossEncoder(CrossEncoderModel):
|
|
|
211
226
|
)
|
|
212
227
|
|
|
213
228
|
# Determine device based on hardware availability
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if has_gpu:
|
|
217
|
-
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
218
|
-
else:
|
|
229
|
+
if self.force_cpu:
|
|
219
230
|
device = "cpu"
|
|
231
|
+
else:
|
|
232
|
+
# Wrap in try-except to gracefully handle any device detection issues
|
|
233
|
+
device = "cpu" # Default to CPU
|
|
234
|
+
try:
|
|
235
|
+
has_gpu = torch.cuda.is_available() or (
|
|
236
|
+
hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
237
|
+
)
|
|
238
|
+
if has_gpu:
|
|
239
|
+
device = None # Let sentence-transformers auto-detect GPU/MPS
|
|
240
|
+
except Exception as e:
|
|
241
|
+
logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
|
|
220
242
|
|
|
221
243
|
self._model = CrossEncoder(
|
|
222
244
|
self.model_name,
|
|
@@ -873,29 +895,33 @@ class LiteLLMCrossEncoder(CrossEncoderModel):
|
|
|
873
895
|
|
|
874
896
|
def create_cross_encoder_from_env() -> CrossEncoderModel:
|
|
875
897
|
"""
|
|
876
|
-
Create a CrossEncoderModel instance based on
|
|
898
|
+
Create a CrossEncoderModel instance based on configuration.
|
|
877
899
|
|
|
878
|
-
|
|
900
|
+
Reads configuration via get_config() to ensure consistency across the codebase.
|
|
879
901
|
|
|
880
902
|
Returns:
|
|
881
903
|
Configured CrossEncoderModel instance
|
|
882
904
|
"""
|
|
883
|
-
|
|
905
|
+
from ..config import get_config
|
|
906
|
+
|
|
907
|
+
config = get_config()
|
|
908
|
+
provider = config.reranker_provider.lower()
|
|
884
909
|
|
|
885
910
|
if provider == "tei":
|
|
886
|
-
url =
|
|
911
|
+
url = config.reranker_tei_url
|
|
887
912
|
if not url:
|
|
888
913
|
raise ValueError(f"{ENV_RERANKER_TEI_URL} is required when {ENV_RERANKER_PROVIDER} is 'tei'")
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
914
|
+
return RemoteTEICrossEncoder(
|
|
915
|
+
base_url=url,
|
|
916
|
+
batch_size=config.reranker_tei_batch_size,
|
|
917
|
+
max_concurrent=config.reranker_tei_max_concurrent,
|
|
918
|
+
)
|
|
892
919
|
elif provider == "local":
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
920
|
+
return LocalSTCrossEncoder(
|
|
921
|
+
model_name=config.reranker_local_model,
|
|
922
|
+
max_concurrent=config.reranker_local_max_concurrent,
|
|
923
|
+
force_cpu=config.reranker_local_force_cpu,
|
|
897
924
|
)
|
|
898
|
-
return LocalSTCrossEncoder(model_name=model_name, max_concurrent=max_concurrent)
|
|
899
925
|
elif provider == "cohere":
|
|
900
926
|
api_key = os.environ.get(ENV_COHERE_API_KEY)
|
|
901
927
|
if not api_key:
|