shellbrain 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -0
- app/__main__.py +7 -0
- app/boot/__init__.py +1 -0
- app/boot/admin_db.py +88 -0
- app/boot/config.py +14 -0
- app/boot/create_policy.py +52 -0
- app/boot/db.py +70 -0
- app/boot/embeddings.py +55 -0
- app/boot/home.py +45 -0
- app/boot/migrations.py +61 -0
- app/boot/read_policy.py +179 -0
- app/boot/repos.py +15 -0
- app/boot/retrieval.py +3 -0
- app/boot/thresholds.py +19 -0
- app/boot/update_policy.py +34 -0
- app/boot/use_cases.py +22 -0
- app/config/__init__.py +1 -0
- app/config/defaults/create_policy.yaml +7 -0
- app/config/defaults/read_policy.yaml +25 -0
- app/config/defaults/runtime.yaml +10 -0
- app/config/defaults/thresholds.yaml +3 -0
- app/config/defaults/update_policy.yaml +5 -0
- app/config/loader.py +58 -0
- app/core/__init__.py +1 -0
- app/core/contracts/__init__.py +1 -0
- app/core/contracts/errors.py +29 -0
- app/core/contracts/requests.py +211 -0
- app/core/contracts/responses.py +15 -0
- app/core/entities/__init__.py +1 -0
- app/core/entities/associations.py +58 -0
- app/core/entities/episodes.py +66 -0
- app/core/entities/evidence.py +29 -0
- app/core/entities/facts.py +30 -0
- app/core/entities/guidance.py +47 -0
- app/core/entities/identity.py +48 -0
- app/core/entities/memory.py +34 -0
- app/core/entities/runtime_context.py +19 -0
- app/core/entities/session_state.py +31 -0
- app/core/entities/telemetry.py +152 -0
- app/core/entities/utility.py +14 -0
- app/core/interfaces/__init__.py +1 -0
- app/core/interfaces/clock.py +12 -0
- app/core/interfaces/config.py +28 -0
- app/core/interfaces/embeddings.py +12 -0
- app/core/interfaces/idgen.py +11 -0
- app/core/interfaces/repos.py +279 -0
- app/core/interfaces/retrieval.py +20 -0
- app/core/interfaces/session_state_store.py +33 -0
- app/core/interfaces/unit_of_work.py +50 -0
- app/core/policies/__init__.py +1 -0
- app/core/policies/_shared/__init__.py +1 -0
- app/core/policies/_shared/executor.py +132 -0
- app/core/policies/_shared/side_effects.py +9 -0
- app/core/policies/create_policy/__init__.py +1 -0
- app/core/policies/create_policy/pipeline.py +96 -0
- app/core/policies/read_policy/__init__.py +1 -0
- app/core/policies/read_policy/bm25.py +114 -0
- app/core/policies/read_policy/context_pack_builder.py +140 -0
- app/core/policies/read_policy/expansion.py +132 -0
- app/core/policies/read_policy/fusion_rrf.py +34 -0
- app/core/policies/read_policy/lexical_query.py +101 -0
- app/core/policies/read_policy/pipeline.py +93 -0
- app/core/policies/read_policy/scenario_lift.py +11 -0
- app/core/policies/read_policy/scoring.py +61 -0
- app/core/policies/read_policy/seed_retrieval.py +54 -0
- app/core/policies/read_policy/utility_prior.py +11 -0
- app/core/policies/update_policy/__init__.py +1 -0
- app/core/policies/update_policy/pipeline.py +80 -0
- app/core/use_cases/__init__.py +1 -0
- app/core/use_cases/build_guidance.py +85 -0
- app/core/use_cases/create_memory.py +26 -0
- app/core/use_cases/manage_session_state.py +159 -0
- app/core/use_cases/read_memory.py +21 -0
- app/core/use_cases/record_episode_sync_telemetry.py +19 -0
- app/core/use_cases/record_operation_telemetry.py +32 -0
- app/core/use_cases/sync_episode.py +162 -0
- app/core/use_cases/update_memory.py +40 -0
- app/migrations/__init__.py +1 -0
- app/migrations/env.py +65 -0
- app/migrations/versions/20260226_0001_initial_schema.py +232 -0
- app/migrations/versions/20260312_0002_add_hard_invariants.py +60 -0
- app/migrations/versions/20260312_0003_drop_create_confidence.py +40 -0
- app/migrations/versions/20260313_0004_episode_sync_hardening.py +71 -0
- app/migrations/versions/20260313_0005_evidence_episode_event_refs.py +45 -0
- app/migrations/versions/20260318_0006_usage_telemetry_schema.py +175 -0
- app/migrations/versions/20260319_0007_identity_session_guidance.py +49 -0
- app/migrations/versions/20260320_0008_instance_metadata_and_backup_safety.py +31 -0
- app/migrations/versions/__init__.py +1 -0
- app/periphery/__init__.py +1 -0
- app/periphery/admin/__init__.py +1 -0
- app/periphery/admin/backup.py +360 -0
- app/periphery/admin/destructive_guard.py +32 -0
- app/periphery/admin/doctor.py +192 -0
- app/periphery/admin/init.py +996 -0
- app/periphery/admin/instance_guard.py +211 -0
- app/periphery/admin/machine_state.py +354 -0
- app/periphery/admin/privileges.py +42 -0
- app/periphery/admin/repo_state.py +266 -0
- app/periphery/admin/restore.py +30 -0
- app/periphery/cli/__init__.py +1 -0
- app/periphery/cli/handlers.py +830 -0
- app/periphery/cli/hydration.py +119 -0
- app/periphery/cli/main.py +710 -0
- app/periphery/cli/presenter_json.py +10 -0
- app/periphery/cli/schema_validation.py +201 -0
- app/periphery/db/__init__.py +1 -0
- app/periphery/db/engine.py +10 -0
- app/periphery/db/models/__init__.py +1 -0
- app/periphery/db/models/associations.py +55 -0
- app/periphery/db/models/episodes.py +55 -0
- app/periphery/db/models/evidence.py +19 -0
- app/periphery/db/models/experiences.py +33 -0
- app/periphery/db/models/instance_metadata.py +17 -0
- app/periphery/db/models/memories.py +39 -0
- app/periphery/db/models/metadata.py +6 -0
- app/periphery/db/models/registry.py +18 -0
- app/periphery/db/models/telemetry.py +174 -0
- app/periphery/db/models/utility.py +19 -0
- app/periphery/db/models/views.py +154 -0
- app/periphery/db/repos/__init__.py +1 -0
- app/periphery/db/repos/relational/__init__.py +1 -0
- app/periphery/db/repos/relational/associations_repo.py +117 -0
- app/periphery/db/repos/relational/episodes_repo.py +188 -0
- app/periphery/db/repos/relational/evidence_repo.py +82 -0
- app/periphery/db/repos/relational/experiences_repo.py +41 -0
- app/periphery/db/repos/relational/memories_repo.py +99 -0
- app/periphery/db/repos/relational/read_policy_repo.py +202 -0
- app/periphery/db/repos/relational/telemetry_repo.py +161 -0
- app/periphery/db/repos/relational/utility_repo.py +30 -0
- app/periphery/db/repos/semantic/__init__.py +1 -0
- app/periphery/db/repos/semantic/keyword_retrieval_repo.py +63 -0
- app/periphery/db/repos/semantic/semantic_retrieval_repo.py +111 -0
- app/periphery/db/session.py +10 -0
- app/periphery/db/uow.py +75 -0
- app/periphery/embeddings/__init__.py +1 -0
- app/periphery/embeddings/local_provider.py +35 -0
- app/periphery/embeddings/query_vector_search.py +18 -0
- app/periphery/episodes/__init__.py +1 -0
- app/periphery/episodes/claude_code.py +387 -0
- app/periphery/episodes/codex.py +423 -0
- app/periphery/episodes/launcher.py +66 -0
- app/periphery/episodes/normalization.py +31 -0
- app/periphery/episodes/poller.py +299 -0
- app/periphery/episodes/source_discovery.py +66 -0
- app/periphery/episodes/tool_filter.py +165 -0
- app/periphery/identity/__init__.py +1 -0
- app/periphery/identity/claude_hook_install.py +67 -0
- app/periphery/identity/claude_runtime.py +83 -0
- app/periphery/identity/codex_runtime.py +32 -0
- app/periphery/identity/compatibility.py +38 -0
- app/periphery/identity/resolver.py +163 -0
- app/periphery/session_state/__init__.py +1 -0
- app/periphery/session_state/file_store.py +100 -0
- app/periphery/telemetry/__init__.py +33 -0
- app/periphery/telemetry/operation_summary.py +299 -0
- app/periphery/telemetry/session_selection.py +156 -0
- app/periphery/telemetry/sync_summary.py +65 -0
- app/periphery/validation/__init__.py +1 -0
- app/periphery/validation/integrity_validation.py +253 -0
- app/periphery/validation/semantic_validation.py +94 -0
- shellbrain-0.1.0.dist-info/METADATA +130 -0
- shellbrain-0.1.0.dist-info/RECORD +165 -0
- shellbrain-0.1.0.dist-info/WHEEL +5 -0
- shellbrain-0.1.0.dist-info/entry_points.txt +2 -0
- shellbrain-0.1.0.dist-info/top_level.txt +1 -0
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package contains the shellbrain system application code."""
|
app/__main__.py
ADDED
app/boot/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package contains factory functions that wire core to periphery."""
|
app/boot/admin_db.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Boot helpers for privileged admin database actions and safety settings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from app.boot.config import get_config_provider
|
|
9
|
+
from app.boot.home import get_machine_backups_dir
|
|
10
|
+
from app.periphery.admin.machine_state import try_load_machine_config
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_admin_db_dsn() -> str:
|
|
14
|
+
"""Resolve the privileged admin DSN from environment-backed runtime config."""
|
|
15
|
+
|
|
16
|
+
machine_config, machine_error = try_load_machine_config()
|
|
17
|
+
if machine_error:
|
|
18
|
+
raise RuntimeError(
|
|
19
|
+
"Shellbrain machine config is unreadable. Rerun `shellbrain init` to repair it."
|
|
20
|
+
)
|
|
21
|
+
if machine_config is not None:
|
|
22
|
+
return machine_config.database.admin_dsn
|
|
23
|
+
|
|
24
|
+
runtime = get_config_provider().get_runtime()
|
|
25
|
+
database = runtime.get("database")
|
|
26
|
+
if not isinstance(database, dict):
|
|
27
|
+
raise RuntimeError("runtime.database must be configured")
|
|
28
|
+
admin_dsn_env = database.get("admin_dsn_env")
|
|
29
|
+
if not isinstance(admin_dsn_env, str) or not admin_dsn_env:
|
|
30
|
+
raise RuntimeError("runtime.database.admin_dsn_env must be configured")
|
|
31
|
+
dsn = os.getenv(admin_dsn_env)
|
|
32
|
+
if not dsn:
|
|
33
|
+
raise RuntimeError(f"{admin_dsn_env} is not set")
|
|
34
|
+
return dsn
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_optional_admin_db_dsn() -> str | None:
|
|
38
|
+
"""Resolve the privileged admin DSN when present, otherwise return None."""
|
|
39
|
+
|
|
40
|
+
machine_config, machine_error = try_load_machine_config()
|
|
41
|
+
if machine_error:
|
|
42
|
+
return None
|
|
43
|
+
if machine_config is not None:
|
|
44
|
+
return machine_config.database.admin_dsn
|
|
45
|
+
|
|
46
|
+
runtime = get_config_provider().get_runtime()
|
|
47
|
+
database = runtime.get("database")
|
|
48
|
+
if not isinstance(database, dict):
|
|
49
|
+
return None
|
|
50
|
+
admin_dsn_env = database.get("admin_dsn_env")
|
|
51
|
+
if not isinstance(admin_dsn_env, str) or not admin_dsn_env:
|
|
52
|
+
return None
|
|
53
|
+
return os.getenv(admin_dsn_env)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_backup_dir() -> Path:
|
|
57
|
+
"""Resolve the on-disk backup directory, defaulting outside the repo tree."""
|
|
58
|
+
|
|
59
|
+
machine_config, machine_error = try_load_machine_config()
|
|
60
|
+
if machine_error:
|
|
61
|
+
return get_machine_backups_dir()
|
|
62
|
+
if machine_config is not None:
|
|
63
|
+
return Path(machine_config.backups.root).expanduser().resolve()
|
|
64
|
+
return Path(os.getenv("SHELLBRAIN_BACKUP_DIR", str(get_machine_backups_dir()))).expanduser().resolve()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_backup_mirror_dir() -> Path | None:
|
|
68
|
+
"""Resolve the optional mirrored backup directory."""
|
|
69
|
+
|
|
70
|
+
configured = os.getenv("SHELLBRAIN_BACKUP_MIRROR_DIR")
|
|
71
|
+
if not configured:
|
|
72
|
+
return None
|
|
73
|
+
return Path(configured).expanduser().resolve()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def should_fail_on_unsafe_app_role() -> bool:
|
|
77
|
+
"""Return whether app commands should fail instead of warning on unsafe DB roles."""
|
|
78
|
+
|
|
79
|
+
configured = os.getenv("SHELLBRAIN_FAIL_ON_UNSAFE_DB_ROLE")
|
|
80
|
+
if configured is None or not configured.strip():
|
|
81
|
+
return True
|
|
82
|
+
return configured.strip().lower() not in {"0", "false", "no", "off"}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_instance_mode_default() -> str:
|
|
86
|
+
"""Resolve the default instance mode used when stamping metadata for the current DB."""
|
|
87
|
+
|
|
88
|
+
return os.getenv("SHELLBRAIN_INSTANCE_MODE", "live").strip().lower() or "live"
|
app/boot/config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""This module defines boot-time helpers that load YAML-backed configuration providers."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from app.config.loader import YamlConfigProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@lru_cache(maxsize=1)
|
|
10
|
+
def get_config_provider() -> YamlConfigProvider:
|
|
11
|
+
"""This function returns the shared YAML configuration provider instance."""
|
|
12
|
+
|
|
13
|
+
defaults_dir = Path(__file__).resolve().parents[1] / "config" / "defaults"
|
|
14
|
+
return YamlConfigProvider(defaults_dir)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Boot-time helpers for normalized create-policy settings."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from app.boot.config import get_config_provider
|
|
6
|
+
from app.core.contracts.errors import ErrorCode, ErrorDetail
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_SUPPORTED_GATES = ("schema", "semantic", "integrity")
|
|
10
|
+
_SUPPORTED_SCOPES = ("repo", "global")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_create_policy_settings() -> dict[str, Any]:
|
|
14
|
+
"""Return normalized create-policy settings from YAML config."""
|
|
15
|
+
|
|
16
|
+
policy = get_config_provider().get_create_policy()
|
|
17
|
+
configured_gates = policy.get("gates")
|
|
18
|
+
if not isinstance(configured_gates, list) or not configured_gates:
|
|
19
|
+
raise ValueError("create_policy.gates must be a non-empty list")
|
|
20
|
+
gates = [str(gate) for gate in configured_gates if str(gate) in _SUPPORTED_GATES]
|
|
21
|
+
if len(gates) != len(configured_gates):
|
|
22
|
+
raise ValueError("create_policy.gates contains unsupported values")
|
|
23
|
+
if "schema" not in gates:
|
|
24
|
+
raise ValueError("create_policy.gates must include schema")
|
|
25
|
+
configured_defaults = policy.get("defaults")
|
|
26
|
+
if not isinstance(configured_defaults, dict):
|
|
27
|
+
raise ValueError("create_policy.defaults must be a mapping")
|
|
28
|
+
scope = configured_defaults.get("scope")
|
|
29
|
+
if not isinstance(scope, str) or scope not in _SUPPORTED_SCOPES:
|
|
30
|
+
raise ValueError("create_policy.defaults.scope must be repo or global")
|
|
31
|
+
return {
|
|
32
|
+
"gates": gates,
|
|
33
|
+
"defaults": {
|
|
34
|
+
"scope": scope,
|
|
35
|
+
},
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_create_hydration_defaults() -> dict[str, Any]:
|
|
40
|
+
"""Return normalized create defaults used by CLI hydration."""
|
|
41
|
+
|
|
42
|
+
return dict(get_create_policy_settings()["defaults"])
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def validate_create_policy_settings() -> list[ErrorDetail]:
|
|
46
|
+
"""Return structured config errors for unsupported create-policy settings."""
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
get_create_policy_settings()
|
|
50
|
+
except ValueError as exc:
|
|
51
|
+
return [ErrorDetail(code=ErrorCode.INTERNAL_ERROR, message=str(exc), field="create_policy.gates")]
|
|
52
|
+
return []
|
app/boot/db.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""This module defines boot-time factory helpers for database engine and sessions."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from app.boot.config import get_config_provider
|
|
7
|
+
from app.periphery.admin.machine_state import try_load_machine_config
|
|
8
|
+
from app.periphery.db.engine import get_engine
|
|
9
|
+
from app.periphery.db.session import get_session_factory
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_db_dsn() -> str:
|
|
13
|
+
"""This function resolves the database DSN from environment configuration."""
|
|
14
|
+
|
|
15
|
+
machine_config, machine_error = try_load_machine_config()
|
|
16
|
+
if machine_error:
|
|
17
|
+
raise RuntimeError(
|
|
18
|
+
"Shellbrain machine config is unreadable. Rerun `shellbrain init` to repair it."
|
|
19
|
+
)
|
|
20
|
+
if machine_config is not None:
|
|
21
|
+
return machine_config.database.app_dsn
|
|
22
|
+
|
|
23
|
+
runtime = get_config_provider().get_runtime()
|
|
24
|
+
database = runtime.get("database")
|
|
25
|
+
if not isinstance(database, dict):
|
|
26
|
+
raise RuntimeError("runtime.database must be configured")
|
|
27
|
+
dsn_env = database.get("dsn_env")
|
|
28
|
+
if not isinstance(dsn_env, str) or not dsn_env:
|
|
29
|
+
raise RuntimeError("runtime.database.dsn_env must be configured")
|
|
30
|
+
dsn = os.getenv(dsn_env)
|
|
31
|
+
if not dsn:
|
|
32
|
+
raise RuntimeError(f"{dsn_env} is not set")
|
|
33
|
+
return dsn
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_optional_db_dsn() -> str | None:
|
|
37
|
+
"""Resolve the application DSN when present, otherwise return None."""
|
|
38
|
+
|
|
39
|
+
machine_config, machine_error = try_load_machine_config()
|
|
40
|
+
if machine_error:
|
|
41
|
+
return None
|
|
42
|
+
if machine_config is not None:
|
|
43
|
+
return machine_config.database.app_dsn
|
|
44
|
+
|
|
45
|
+
runtime = get_config_provider().get_runtime()
|
|
46
|
+
database = runtime.get("database")
|
|
47
|
+
if not isinstance(database, dict):
|
|
48
|
+
return None
|
|
49
|
+
dsn_env = database.get("dsn_env")
|
|
50
|
+
if not isinstance(dsn_env, str) or not dsn_env:
|
|
51
|
+
return None
|
|
52
|
+
return os.getenv(dsn_env)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_engine_instance():
|
|
56
|
+
"""This function builds a shared SQLAlchemy engine for the application."""
|
|
57
|
+
|
|
58
|
+
return get_engine(get_db_dsn())
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_session_factory_instance():
|
|
62
|
+
"""This function builds a reusable SQLAlchemy session factory for the app."""
|
|
63
|
+
|
|
64
|
+
return get_session_factory(get_engine_instance())
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_defaults_dir() -> Path:
|
|
68
|
+
"""This function returns the path to bundled YAML default configuration files."""
|
|
69
|
+
|
|
70
|
+
return Path(__file__).resolve().parents[1] / "config" / "defaults"
|
app/boot/embeddings.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""This module defines boot-time wiring for embedding provider construction."""
|
|
2
|
+
|
|
3
|
+
from app.boot.home import get_machine_models_dir
|
|
4
|
+
from app.boot.config import get_config_provider
|
|
5
|
+
from app.core.interfaces.embeddings import IEmbeddingProvider
|
|
6
|
+
from app.periphery.admin.machine_state import load_machine_config
|
|
7
|
+
from app.periphery.embeddings.local_provider import SentenceTransformersEmbeddingProvider
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_embedding_config() -> dict:
|
|
11
|
+
"""This function returns runtime embedding configuration values."""
|
|
12
|
+
|
|
13
|
+
runtime = get_config_provider().get_runtime()
|
|
14
|
+
values = runtime.get("embeddings")
|
|
15
|
+
if not isinstance(values, dict):
|
|
16
|
+
raise ValueError("runtime.embeddings must be configured")
|
|
17
|
+
return values
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_embedding_model_name() -> str:
|
|
21
|
+
"""This function resolves the model name persisted alongside embedding vectors."""
|
|
22
|
+
|
|
23
|
+
config = _get_embedding_config()
|
|
24
|
+
provider = config.get("provider")
|
|
25
|
+
model = config.get("model")
|
|
26
|
+
if not isinstance(provider, str) or not provider:
|
|
27
|
+
raise ValueError("runtime.embeddings.provider must be configured")
|
|
28
|
+
if not isinstance(model, str) or not model:
|
|
29
|
+
raise ValueError("runtime.embeddings.model must be configured")
|
|
30
|
+
if provider == "sentence_transformers":
|
|
31
|
+
return model
|
|
32
|
+
raise ValueError(f"Unsupported embedding provider: {provider}")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_embedding_provider() -> IEmbeddingProvider:
|
|
36
|
+
"""This function constructs the configured local embedding provider."""
|
|
37
|
+
|
|
38
|
+
config = _get_embedding_config()
|
|
39
|
+
provider = config.get("provider")
|
|
40
|
+
model = config.get("model")
|
|
41
|
+
if not isinstance(provider, str) or not provider:
|
|
42
|
+
raise ValueError("runtime.embeddings.provider must be configured")
|
|
43
|
+
if not isinstance(model, str) or not model:
|
|
44
|
+
raise ValueError("runtime.embeddings.model must be configured")
|
|
45
|
+
if provider == "sentence_transformers":
|
|
46
|
+
machine_config = load_machine_config()
|
|
47
|
+
cache_folder = str(get_machine_models_dir())
|
|
48
|
+
if machine_config is not None:
|
|
49
|
+
cache_folder = machine_config.embeddings.cache_path
|
|
50
|
+
if machine_config.embeddings.readiness_state != "ready":
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
"Shellbrain embeddings are not ready. Rerun `shellbrain init` to finish model setup."
|
|
53
|
+
)
|
|
54
|
+
return SentenceTransformersEmbeddingProvider(model=model, cache_folder=cache_folder)
|
|
55
|
+
raise ValueError(f"Unsupported embedding provider: {provider}")
|
app/boot/home.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Helpers for locating Shellbrain machine-owned runtime directories."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_shellbrain_home() -> Path:
|
|
10
|
+
"""Return the machine-owned Shellbrain home root."""
|
|
11
|
+
|
|
12
|
+
configured = os.getenv("SHELLBRAIN_HOME")
|
|
13
|
+
if configured:
|
|
14
|
+
return Path(configured).expanduser().resolve()
|
|
15
|
+
return Path("~/.shellbrain").expanduser().resolve()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_machine_config_path() -> Path:
|
|
19
|
+
"""Return the machine configuration file path."""
|
|
20
|
+
|
|
21
|
+
return get_shellbrain_home() / "config.toml"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_machine_lock_path() -> Path:
|
|
25
|
+
"""Return the machine-scoped init lock path."""
|
|
26
|
+
|
|
27
|
+
return get_shellbrain_home() / "init.lock"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_machine_models_dir() -> Path:
|
|
31
|
+
"""Return the machine-owned embedding model cache path."""
|
|
32
|
+
|
|
33
|
+
return get_shellbrain_home() / "models"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_machine_backups_dir() -> Path:
|
|
37
|
+
"""Return the machine-owned default backup directory."""
|
|
38
|
+
|
|
39
|
+
return get_shellbrain_home() / "backups"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_machine_postgres_data_dir() -> Path:
|
|
43
|
+
"""Return the managed Postgres bind-mounted data directory."""
|
|
44
|
+
|
|
45
|
+
return get_shellbrain_home() / "postgres-data"
|
app/boot/migrations.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Packaged Alembic bootstrap helpers for installed-shellbrain database migrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib.resources import as_file, files
|
|
6
|
+
|
|
7
|
+
from alembic import command
|
|
8
|
+
from alembic.config import Config
|
|
9
|
+
|
|
10
|
+
from app.boot.admin_db import get_admin_db_dsn, get_backup_dir, get_backup_mirror_dir, get_instance_mode_default
|
|
11
|
+
from app.boot.db import get_optional_db_dsn
|
|
12
|
+
from app.periphery.admin.destructive_guard import backup_and_verify_before_destructive_action
|
|
13
|
+
from app.periphery.admin.instance_guard import ensure_instance_metadata, fetch_instance_metadata
|
|
14
|
+
from app.periphery.admin.privileges import reconcile_app_role_privileges
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def upgrade_database(revision: str = "head") -> None:
|
|
18
|
+
"""Apply packaged Alembic migrations to the configured database."""
|
|
19
|
+
|
|
20
|
+
config = Config()
|
|
21
|
+
admin_dsn = get_admin_db_dsn()
|
|
22
|
+
if _database_has_shellbrain_objects(admin_dsn):
|
|
23
|
+
backup_and_verify_before_destructive_action(
|
|
24
|
+
admin_dsn=admin_dsn,
|
|
25
|
+
backup_root=get_backup_dir(),
|
|
26
|
+
mirror_root=get_backup_mirror_dir(),
|
|
27
|
+
)
|
|
28
|
+
config.set_main_option("sqlalchemy.url", admin_dsn)
|
|
29
|
+
with as_file(files("app").joinpath("migrations")) as migrations_path:
|
|
30
|
+
config.set_main_option("script_location", str(migrations_path))
|
|
31
|
+
command.upgrade(config, revision)
|
|
32
|
+
if fetch_instance_metadata(admin_dsn) is None:
|
|
33
|
+
ensure_instance_metadata(
|
|
34
|
+
admin_dsn,
|
|
35
|
+
instance_mode=get_instance_mode_default(),
|
|
36
|
+
created_by="app.admin.migrate",
|
|
37
|
+
notes="Stamped by packaged migration runner.",
|
|
38
|
+
)
|
|
39
|
+
app_dsn = get_optional_db_dsn()
|
|
40
|
+
if app_dsn:
|
|
41
|
+
reconcile_app_role_privileges(admin_dsn=admin_dsn, app_dsn=app_dsn)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _database_has_shellbrain_objects(admin_dsn: str) -> bool:
|
|
45
|
+
"""Return whether the target database already contains Shellbrain-managed tables."""
|
|
46
|
+
|
|
47
|
+
import psycopg
|
|
48
|
+
|
|
49
|
+
with psycopg.connect(admin_dsn.replace("+psycopg", "")) as conn:
|
|
50
|
+
with conn.cursor() as cur:
|
|
51
|
+
cur.execute(
|
|
52
|
+
"""
|
|
53
|
+
SELECT EXISTS (
|
|
54
|
+
SELECT 1
|
|
55
|
+
FROM information_schema.tables
|
|
56
|
+
WHERE table_schema = 'public'
|
|
57
|
+
AND table_name IN ('memories', 'episodes', 'episode_events', 'operation_invocations')
|
|
58
|
+
)
|
|
59
|
+
"""
|
|
60
|
+
)
|
|
61
|
+
return bool(cur.fetchone()[0])
|
app/boot/read_policy.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Boot-time helpers for resolving YAML-backed read-policy settings."""
|
|
2
|
+
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from app.boot.config import get_config_provider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_SUPPORTED_MODES = ("targeted", "ambient")
|
|
10
|
+
_SUPPORTED_BUCKETS = ("direct", "explicit", "implicit")
|
|
11
|
+
_EXPAND_INT_FIELDS = ("semantic_hops", "max_association_depth")
|
|
12
|
+
_EXPAND_BOOL_FIELDS = (
|
|
13
|
+
"include_problem_links",
|
|
14
|
+
"include_fact_update_links",
|
|
15
|
+
"include_association_links",
|
|
16
|
+
)
|
|
17
|
+
_EXPAND_FLOAT_FIELDS = ("min_association_strength",)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _require_mapping(value: Any, *, field: str) -> dict[str, Any]:
|
|
21
|
+
"""Require one config node to be a mapping."""
|
|
22
|
+
|
|
23
|
+
if not isinstance(value, dict):
|
|
24
|
+
raise ValueError(f"{field} must be a mapping")
|
|
25
|
+
return value
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _require_bool(mapping: dict[str, Any], key: str, *, field: str) -> bool:
|
|
29
|
+
"""Require one config value to be a boolean."""
|
|
30
|
+
|
|
31
|
+
value = mapping.get(key)
|
|
32
|
+
if not isinstance(value, bool):
|
|
33
|
+
raise ValueError(f"{field}.{key} must be a boolean")
|
|
34
|
+
return value
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _require_int(mapping: dict[str, Any], key: str, *, field: str) -> int:
|
|
38
|
+
"""Require one config value to be an integer."""
|
|
39
|
+
|
|
40
|
+
value = mapping.get(key)
|
|
41
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
42
|
+
raise ValueError(f"{field}.{key} must be an integer")
|
|
43
|
+
return int(value)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _require_float(mapping: dict[str, Any], key: str, *, field: str) -> float:
|
|
47
|
+
"""Require one config value to be numeric."""
|
|
48
|
+
|
|
49
|
+
value = mapping.get(key)
|
|
50
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
|
51
|
+
raise ValueError(f"{field}.{key} must be numeric")
|
|
52
|
+
return float(value)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _require_mode(value: Any, *, field: str) -> str:
|
|
56
|
+
"""Require one config value to be a supported read mode."""
|
|
57
|
+
|
|
58
|
+
if not isinstance(value, str) or value not in _SUPPORTED_MODES:
|
|
59
|
+
raise ValueError(f"{field} must be one of: {', '.join(_SUPPORTED_MODES)}")
|
|
60
|
+
return value
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_read_settings() -> dict[str, Any]:
|
|
64
|
+
"""Return normalized read settings from YAML-backed runtime and policy config."""
|
|
65
|
+
|
|
66
|
+
config_provider = get_config_provider()
|
|
67
|
+
read_policy = _require_mapping(config_provider.get_read_policy(), field="read_policy")
|
|
68
|
+
runtime = _require_mapping(config_provider.get_runtime(), field="runtime")
|
|
69
|
+
cli_defaults = _require_mapping(runtime.get("cli"), field="runtime.cli")
|
|
70
|
+
limits = _require_mapping(read_policy.get("limits"), field="read_policy.limits")
|
|
71
|
+
expansion = _require_mapping(read_policy.get("expansion"), field="read_policy.expansion")
|
|
72
|
+
quotas = _require_mapping(read_policy.get("quotas"), field="read_policy.quotas")
|
|
73
|
+
weights = _require_mapping(read_policy.get("weights"), field="read_policy.weights")
|
|
74
|
+
fusion = _require_mapping(read_policy.get("fusion"), field="read_policy.fusion")
|
|
75
|
+
|
|
76
|
+
settings = {
|
|
77
|
+
"default_mode": _require_mode(cli_defaults.get("default_mode"), field="runtime.cli.default_mode"),
|
|
78
|
+
"include_global": _require_bool(cli_defaults, "include_global", field="runtime.cli"),
|
|
79
|
+
"limits_by_mode": {
|
|
80
|
+
mode: _require_int(limits, mode, field="read_policy.limits")
|
|
81
|
+
for mode in _SUPPORTED_MODES
|
|
82
|
+
},
|
|
83
|
+
"expand": {
|
|
84
|
+
**{
|
|
85
|
+
key: _require_int(expansion, key, field="read_policy.expansion")
|
|
86
|
+
for key in _EXPAND_INT_FIELDS
|
|
87
|
+
},
|
|
88
|
+
**{
|
|
89
|
+
key: _require_bool(expansion, key, field="read_policy.expansion")
|
|
90
|
+
for key in _EXPAND_BOOL_FIELDS
|
|
91
|
+
},
|
|
92
|
+
**{
|
|
93
|
+
key: _require_float(expansion, key, field="read_policy.expansion")
|
|
94
|
+
for key in _EXPAND_FLOAT_FIELDS
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
"quotas_by_mode": {
|
|
98
|
+
mode: {
|
|
99
|
+
bucket: _require_int(
|
|
100
|
+
_require_mapping(quotas.get(mode), field=f"read_policy.quotas.{mode}"),
|
|
101
|
+
bucket,
|
|
102
|
+
field=f"read_policy.quotas.{mode}",
|
|
103
|
+
)
|
|
104
|
+
for bucket in _SUPPORTED_BUCKETS
|
|
105
|
+
}
|
|
106
|
+
for mode in _SUPPORTED_MODES
|
|
107
|
+
},
|
|
108
|
+
"retrieval": {
|
|
109
|
+
"semantic_weight": _require_float(weights, "semantic", field="read_policy.weights"),
|
|
110
|
+
"keyword_weight": _require_float(weights, "keyword", field="read_policy.weights"),
|
|
111
|
+
"k_rrf": _require_float(fusion, "k_rrf", field="read_policy.fusion"),
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
return deepcopy(settings)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_read_hydration_defaults() -> dict[str, Any]:
|
|
118
|
+
"""Return the read defaults expected by CLI hydration."""
|
|
119
|
+
|
|
120
|
+
settings = get_read_settings()
|
|
121
|
+
return {
|
|
122
|
+
"default_mode": settings["default_mode"],
|
|
123
|
+
"include_global": settings["include_global"],
|
|
124
|
+
"limits_by_mode": deepcopy(settings["limits_by_mode"]),
|
|
125
|
+
"expand": deepcopy(settings["expand"]),
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_retrieval_defaults() -> dict[str, float]:
|
|
130
|
+
"""Return normalized retrieval defaults for fusion and seed retrieval."""
|
|
131
|
+
|
|
132
|
+
return dict(get_read_settings()["retrieval"])
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def resolve_read_limit(*, mode: str, explicit_limit: int | None) -> int:
|
|
136
|
+
"""Resolve the effective read limit from explicit payload or mode-based config."""
|
|
137
|
+
|
|
138
|
+
if explicit_limit is not None:
|
|
139
|
+
return int(explicit_limit)
|
|
140
|
+
settings = get_read_settings()
|
|
141
|
+
resolved_mode = _require_mode(mode, field="read.mode")
|
|
142
|
+
return int(settings["limits_by_mode"][resolved_mode])
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def resolve_read_quotas(*, mode: str) -> dict[str, int]:
|
|
146
|
+
"""Resolve the configured context-pack quotas for one read mode."""
|
|
147
|
+
|
|
148
|
+
settings = get_read_settings()
|
|
149
|
+
resolved_mode = _require_mode(mode, field="read.mode")
|
|
150
|
+
quotas = settings["quotas_by_mode"][resolved_mode]
|
|
151
|
+
return {bucket: int(value) for bucket, value in quotas.items()}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def resolve_read_payload_defaults(payload: dict[str, Any]) -> dict[str, Any]:
|
|
155
|
+
"""Resolve effective read payload defaults from YAML-backed settings."""
|
|
156
|
+
|
|
157
|
+
settings = get_read_settings()
|
|
158
|
+
resolved = dict(payload)
|
|
159
|
+
mode = resolved.get("mode")
|
|
160
|
+
if mode is None:
|
|
161
|
+
mode = settings["default_mode"]
|
|
162
|
+
resolved["mode"] = _require_mode(mode, field="read.mode")
|
|
163
|
+
if resolved.get("include_global") is None:
|
|
164
|
+
resolved["include_global"] = settings["include_global"]
|
|
165
|
+
if resolved.get("limit") is None:
|
|
166
|
+
resolved["limit"] = settings["limits_by_mode"][resolved["mode"]]
|
|
167
|
+
|
|
168
|
+
incoming_expand = resolved.get("expand")
|
|
169
|
+
merged_expand = deepcopy(settings["expand"])
|
|
170
|
+
if incoming_expand is None:
|
|
171
|
+
resolved["expand"] = merged_expand
|
|
172
|
+
return resolved
|
|
173
|
+
if not isinstance(incoming_expand, dict):
|
|
174
|
+
raise ValueError("read.expand must be a mapping")
|
|
175
|
+
for key, value in incoming_expand.items():
|
|
176
|
+
if value is not None:
|
|
177
|
+
merged_expand[key] = value
|
|
178
|
+
resolved["expand"] = merged_expand
|
|
179
|
+
return resolved
|
app/boot/repos.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""This module defines boot-time helpers that expose repository-ready unit-of-work factories."""
|
|
2
|
+
|
|
3
|
+
from app.boot.db import get_session_factory_instance
|
|
4
|
+
from app.boot.embeddings import get_embedding_provider
|
|
5
|
+
from app.periphery.db.uow import PostgresUnitOfWork
|
|
6
|
+
from app.periphery.embeddings.query_vector_search import EmbeddingBackedVectorSearch
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_uow() -> PostgresUnitOfWork:
|
|
10
|
+
"""This function creates a fresh unit-of-work instance with bound repositories."""
|
|
11
|
+
|
|
12
|
+
return PostgresUnitOfWork(
|
|
13
|
+
get_session_factory_instance(),
|
|
14
|
+
vector_search_factory=lambda: EmbeddingBackedVectorSearch(get_embedding_provider()),
|
|
15
|
+
)
|
app/boot/retrieval.py
ADDED
app/boot/thresholds.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Boot-time helpers for normalized retrieval threshold settings."""
|
|
2
|
+
|
|
3
|
+
from app.boot.config import get_config_provider
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_threshold_settings() -> dict[str, float]:
|
|
7
|
+
"""Return normalized retrieval thresholds from YAML config."""
|
|
8
|
+
|
|
9
|
+
thresholds = get_config_provider().get_thresholds()
|
|
10
|
+
semantic_threshold = thresholds.get("semantic_threshold")
|
|
11
|
+
keyword_threshold = thresholds.get("keyword_threshold")
|
|
12
|
+
if isinstance(semantic_threshold, bool) or not isinstance(semantic_threshold, (int, float)):
|
|
13
|
+
raise ValueError("thresholds.semantic_threshold must be numeric")
|
|
14
|
+
if isinstance(keyword_threshold, bool) or not isinstance(keyword_threshold, (int, float)):
|
|
15
|
+
raise ValueError("thresholds.keyword_threshold must be numeric")
|
|
16
|
+
return {
|
|
17
|
+
"semantic_threshold": float(semantic_threshold),
|
|
18
|
+
"keyword_threshold": float(keyword_threshold),
|
|
19
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Boot-time helpers for normalized update-policy settings."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from app.boot.config import get_config_provider
|
|
6
|
+
from app.core.contracts.errors import ErrorCode, ErrorDetail
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_SUPPORTED_GATES = ("schema", "semantic", "integrity")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_update_policy_settings() -> dict[str, Any]:
|
|
13
|
+
"""Return normalized update-policy settings from YAML config."""
|
|
14
|
+
|
|
15
|
+
policy = get_config_provider().get_update_policy()
|
|
16
|
+
configured_gates = policy.get("gates")
|
|
17
|
+
if not isinstance(configured_gates, list) or not configured_gates:
|
|
18
|
+
raise ValueError("update_policy.gates must be a non-empty list")
|
|
19
|
+
gates = [str(gate) for gate in configured_gates if str(gate) in _SUPPORTED_GATES]
|
|
20
|
+
if len(gates) != len(configured_gates):
|
|
21
|
+
raise ValueError("update_policy.gates contains unsupported values")
|
|
22
|
+
if "schema" not in gates:
|
|
23
|
+
raise ValueError("update_policy.gates must include schema")
|
|
24
|
+
return {"gates": gates}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def validate_update_policy_settings() -> list[ErrorDetail]:
|
|
28
|
+
"""Return structured config errors for unsupported update-policy settings."""
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
get_update_policy_settings()
|
|
32
|
+
except ValueError as exc:
|
|
33
|
+
return [ErrorDetail(code=ErrorCode.INTERNAL_ERROR, message=str(exc), field="update_policy.gates")]
|
|
34
|
+
return []
|