shellbrain 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -0
- app/__main__.py +7 -0
- app/boot/__init__.py +1 -0
- app/boot/admin_db.py +88 -0
- app/boot/config.py +14 -0
- app/boot/create_policy.py +52 -0
- app/boot/db.py +70 -0
- app/boot/embeddings.py +55 -0
- app/boot/home.py +45 -0
- app/boot/migrations.py +61 -0
- app/boot/read_policy.py +179 -0
- app/boot/repos.py +15 -0
- app/boot/retrieval.py +3 -0
- app/boot/thresholds.py +19 -0
- app/boot/update_policy.py +34 -0
- app/boot/use_cases.py +22 -0
- app/config/__init__.py +1 -0
- app/config/defaults/create_policy.yaml +7 -0
- app/config/defaults/read_policy.yaml +25 -0
- app/config/defaults/runtime.yaml +10 -0
- app/config/defaults/thresholds.yaml +3 -0
- app/config/defaults/update_policy.yaml +5 -0
- app/config/loader.py +58 -0
- app/core/__init__.py +1 -0
- app/core/contracts/__init__.py +1 -0
- app/core/contracts/errors.py +29 -0
- app/core/contracts/requests.py +211 -0
- app/core/contracts/responses.py +15 -0
- app/core/entities/__init__.py +1 -0
- app/core/entities/associations.py +58 -0
- app/core/entities/episodes.py +66 -0
- app/core/entities/evidence.py +29 -0
- app/core/entities/facts.py +30 -0
- app/core/entities/guidance.py +47 -0
- app/core/entities/identity.py +48 -0
- app/core/entities/memory.py +34 -0
- app/core/entities/runtime_context.py +19 -0
- app/core/entities/session_state.py +31 -0
- app/core/entities/telemetry.py +152 -0
- app/core/entities/utility.py +14 -0
- app/core/interfaces/__init__.py +1 -0
- app/core/interfaces/clock.py +12 -0
- app/core/interfaces/config.py +28 -0
- app/core/interfaces/embeddings.py +12 -0
- app/core/interfaces/idgen.py +11 -0
- app/core/interfaces/repos.py +279 -0
- app/core/interfaces/retrieval.py +20 -0
- app/core/interfaces/session_state_store.py +33 -0
- app/core/interfaces/unit_of_work.py +50 -0
- app/core/policies/__init__.py +1 -0
- app/core/policies/_shared/__init__.py +1 -0
- app/core/policies/_shared/executor.py +132 -0
- app/core/policies/_shared/side_effects.py +9 -0
- app/core/policies/create_policy/__init__.py +1 -0
- app/core/policies/create_policy/pipeline.py +96 -0
- app/core/policies/read_policy/__init__.py +1 -0
- app/core/policies/read_policy/bm25.py +114 -0
- app/core/policies/read_policy/context_pack_builder.py +140 -0
- app/core/policies/read_policy/expansion.py +132 -0
- app/core/policies/read_policy/fusion_rrf.py +34 -0
- app/core/policies/read_policy/lexical_query.py +101 -0
- app/core/policies/read_policy/pipeline.py +93 -0
- app/core/policies/read_policy/scenario_lift.py +11 -0
- app/core/policies/read_policy/scoring.py +61 -0
- app/core/policies/read_policy/seed_retrieval.py +54 -0
- app/core/policies/read_policy/utility_prior.py +11 -0
- app/core/policies/update_policy/__init__.py +1 -0
- app/core/policies/update_policy/pipeline.py +80 -0
- app/core/use_cases/__init__.py +1 -0
- app/core/use_cases/build_guidance.py +85 -0
- app/core/use_cases/create_memory.py +26 -0
- app/core/use_cases/manage_session_state.py +159 -0
- app/core/use_cases/read_memory.py +21 -0
- app/core/use_cases/record_episode_sync_telemetry.py +19 -0
- app/core/use_cases/record_operation_telemetry.py +32 -0
- app/core/use_cases/sync_episode.py +162 -0
- app/core/use_cases/update_memory.py +40 -0
- app/migrations/__init__.py +1 -0
- app/migrations/env.py +65 -0
- app/migrations/versions/20260226_0001_initial_schema.py +232 -0
- app/migrations/versions/20260312_0002_add_hard_invariants.py +60 -0
- app/migrations/versions/20260312_0003_drop_create_confidence.py +40 -0
- app/migrations/versions/20260313_0004_episode_sync_hardening.py +71 -0
- app/migrations/versions/20260313_0005_evidence_episode_event_refs.py +45 -0
- app/migrations/versions/20260318_0006_usage_telemetry_schema.py +175 -0
- app/migrations/versions/20260319_0007_identity_session_guidance.py +49 -0
- app/migrations/versions/20260320_0008_instance_metadata_and_backup_safety.py +31 -0
- app/migrations/versions/__init__.py +1 -0
- app/periphery/__init__.py +1 -0
- app/periphery/admin/__init__.py +1 -0
- app/periphery/admin/backup.py +360 -0
- app/periphery/admin/destructive_guard.py +32 -0
- app/periphery/admin/doctor.py +192 -0
- app/periphery/admin/init.py +996 -0
- app/periphery/admin/instance_guard.py +211 -0
- app/periphery/admin/machine_state.py +354 -0
- app/periphery/admin/privileges.py +42 -0
- app/periphery/admin/repo_state.py +266 -0
- app/periphery/admin/restore.py +30 -0
- app/periphery/cli/__init__.py +1 -0
- app/periphery/cli/handlers.py +830 -0
- app/periphery/cli/hydration.py +119 -0
- app/periphery/cli/main.py +710 -0
- app/periphery/cli/presenter_json.py +10 -0
- app/periphery/cli/schema_validation.py +201 -0
- app/periphery/db/__init__.py +1 -0
- app/periphery/db/engine.py +10 -0
- app/periphery/db/models/__init__.py +1 -0
- app/periphery/db/models/associations.py +55 -0
- app/periphery/db/models/episodes.py +55 -0
- app/periphery/db/models/evidence.py +19 -0
- app/periphery/db/models/experiences.py +33 -0
- app/periphery/db/models/instance_metadata.py +17 -0
- app/periphery/db/models/memories.py +39 -0
- app/periphery/db/models/metadata.py +6 -0
- app/periphery/db/models/registry.py +18 -0
- app/periphery/db/models/telemetry.py +174 -0
- app/periphery/db/models/utility.py +19 -0
- app/periphery/db/models/views.py +154 -0
- app/periphery/db/repos/__init__.py +1 -0
- app/periphery/db/repos/relational/__init__.py +1 -0
- app/periphery/db/repos/relational/associations_repo.py +117 -0
- app/periphery/db/repos/relational/episodes_repo.py +188 -0
- app/periphery/db/repos/relational/evidence_repo.py +82 -0
- app/periphery/db/repos/relational/experiences_repo.py +41 -0
- app/periphery/db/repos/relational/memories_repo.py +99 -0
- app/periphery/db/repos/relational/read_policy_repo.py +202 -0
- app/periphery/db/repos/relational/telemetry_repo.py +161 -0
- app/periphery/db/repos/relational/utility_repo.py +30 -0
- app/periphery/db/repos/semantic/__init__.py +1 -0
- app/periphery/db/repos/semantic/keyword_retrieval_repo.py +63 -0
- app/periphery/db/repos/semantic/semantic_retrieval_repo.py +111 -0
- app/periphery/db/session.py +10 -0
- app/periphery/db/uow.py +75 -0
- app/periphery/embeddings/__init__.py +1 -0
- app/periphery/embeddings/local_provider.py +35 -0
- app/periphery/embeddings/query_vector_search.py +18 -0
- app/periphery/episodes/__init__.py +1 -0
- app/periphery/episodes/claude_code.py +387 -0
- app/periphery/episodes/codex.py +423 -0
- app/periphery/episodes/launcher.py +66 -0
- app/periphery/episodes/normalization.py +31 -0
- app/periphery/episodes/poller.py +299 -0
- app/periphery/episodes/source_discovery.py +66 -0
- app/periphery/episodes/tool_filter.py +165 -0
- app/periphery/identity/__init__.py +1 -0
- app/periphery/identity/claude_hook_install.py +67 -0
- app/periphery/identity/claude_runtime.py +83 -0
- app/periphery/identity/codex_runtime.py +32 -0
- app/periphery/identity/compatibility.py +38 -0
- app/periphery/identity/resolver.py +163 -0
- app/periphery/session_state/__init__.py +1 -0
- app/periphery/session_state/file_store.py +100 -0
- app/periphery/telemetry/__init__.py +33 -0
- app/periphery/telemetry/operation_summary.py +299 -0
- app/periphery/telemetry/session_selection.py +156 -0
- app/periphery/telemetry/sync_summary.py +65 -0
- app/periphery/validation/__init__.py +1 -0
- app/periphery/validation/integrity_validation.py +253 -0
- app/periphery/validation/semantic_validation.py +94 -0
- shellbrain-0.1.0.dist-info/METADATA +130 -0
- shellbrain-0.1.0.dist-info/RECORD +165 -0
- shellbrain-0.1.0.dist-info/WHEEL +5 -0
- shellbrain-0.1.0.dist-info/entry_points.txt +2 -0
- shellbrain-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,996 @@
|
|
|
1
|
+
"""Managed Shellbrain bootstrap and repair flow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime, timedelta, timezone
|
|
8
|
+
import importlib.metadata
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import secrets
|
|
13
|
+
import shutil
|
|
14
|
+
import socket
|
|
15
|
+
import subprocess
|
|
16
|
+
import sys
|
|
17
|
+
import time
|
|
18
|
+
from typing import Iterator
|
|
19
|
+
|
|
20
|
+
import psycopg
|
|
21
|
+
from psycopg import sql
|
|
22
|
+
|
|
23
|
+
from app.boot.config import get_config_provider
|
|
24
|
+
from app.boot.home import (
|
|
25
|
+
get_machine_backups_dir,
|
|
26
|
+
get_machine_lock_path,
|
|
27
|
+
get_machine_models_dir,
|
|
28
|
+
get_machine_postgres_data_dir,
|
|
29
|
+
get_shellbrain_home,
|
|
30
|
+
)
|
|
31
|
+
from app.periphery.admin.destructive_guard import backup_and_verify_before_destructive_action
|
|
32
|
+
from app.periphery.admin.instance_guard import dsn_fingerprint, ensure_instance_metadata
|
|
33
|
+
from app.periphery.admin.machine_state import (
|
|
34
|
+
BOOTSTRAP_STATE_PROVISIONING,
|
|
35
|
+
BOOTSTRAP_STATE_READY,
|
|
36
|
+
BOOTSTRAP_STATE_REPAIR_NEEDED,
|
|
37
|
+
BOOTSTRAP_VERSION,
|
|
38
|
+
CONFIG_VERSION,
|
|
39
|
+
BackupState,
|
|
40
|
+
DatabaseState,
|
|
41
|
+
EmbeddingRuntimeState,
|
|
42
|
+
MachineConfig,
|
|
43
|
+
ManagedInstanceState,
|
|
44
|
+
backup_corrupt_machine_config,
|
|
45
|
+
load_machine_config,
|
|
46
|
+
save_machine_config,
|
|
47
|
+
save_recovery_stub,
|
|
48
|
+
try_load_machine_config,
|
|
49
|
+
update_bootstrap_state,
|
|
50
|
+
)
|
|
51
|
+
from app.periphery.admin.privileges import reconcile_app_role_privileges
|
|
52
|
+
from app.periphery.admin.repo_state import (
|
|
53
|
+
IDENTITY_STRENGTH_WEAK_LOCAL,
|
|
54
|
+
RepoRegistration,
|
|
55
|
+
load_repo_registration,
|
|
56
|
+
register_repo,
|
|
57
|
+
)
|
|
58
|
+
from app.periphery.identity.claude_hook_install import install_claude_hook
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
INIT_OUTCOME_INITIALIZED = "initialized"
|
|
62
|
+
INIT_OUTCOME_NOOP = "noop"
|
|
63
|
+
INIT_OUTCOME_REPAIRED = "repaired"
|
|
64
|
+
INIT_OUTCOME_BLOCKED_CONFLICT = "blocked_conflict"
|
|
65
|
+
INIT_OUTCOME_BLOCKED_LOCK = "blocked_lock"
|
|
66
|
+
INIT_OUTCOME_BLOCKED_DEPENDENCY = "blocked_dependency"
|
|
67
|
+
INIT_OUTCOME_BLOCKED_CONFIG_CORRUPT = "blocked_config_corrupt"
|
|
68
|
+
|
|
69
|
+
INIT_EXIT_CODES = {
|
|
70
|
+
INIT_OUTCOME_INITIALIZED: 0,
|
|
71
|
+
INIT_OUTCOME_NOOP: 0,
|
|
72
|
+
INIT_OUTCOME_REPAIRED: 0,
|
|
73
|
+
INIT_OUTCOME_BLOCKED_CONFLICT: 10,
|
|
74
|
+
INIT_OUTCOME_BLOCKED_LOCK: 11,
|
|
75
|
+
INIT_OUTCOME_BLOCKED_DEPENDENCY: 12,
|
|
76
|
+
INIT_OUTCOME_BLOCKED_CONFIG_CORRUPT: 13,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
_MANAGED_IMAGE = "pgvector/pgvector:pg16"
|
|
80
|
+
_MANAGED_DB_NAME = "shellbrain"
|
|
81
|
+
_MANAGED_ADMIN_USER = "shellbrain_admin"
|
|
82
|
+
_MANAGED_APP_USER = "shellbrain_app"
|
|
83
|
+
_MANAGED_HOST = "127.0.0.1"
|
|
84
|
+
_MANAGED_LABEL = "io.shellbrain.managed"
|
|
85
|
+
_MANAGED_HOME_LABEL = "io.shellbrain.home_sha"
|
|
86
|
+
_MANAGED_INSTANCE_LABEL = "io.shellbrain.instance_id"
|
|
87
|
+
_MANAGED_PORT_START = 55432
|
|
88
|
+
_MANAGED_PORT_END = 55499
|
|
89
|
+
_LOCK_TIMEOUT_SECONDS = 30
|
|
90
|
+
_STALE_LOCK_MINUTES = 15
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class InitDependencyError(RuntimeError):
|
|
94
|
+
"""Raised when one bootstrap dependency is missing."""
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class InitConflictError(RuntimeError):
|
|
98
|
+
"""Raised when managed resources cannot be adopted safely."""
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class InitLockError(RuntimeError):
|
|
102
|
+
"""Raised when the machine init lock cannot be acquired safely."""
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(frozen=True)
|
|
106
|
+
class InitResult:
|
|
107
|
+
"""Structured init outcome and user-facing notes."""
|
|
108
|
+
|
|
109
|
+
outcome: str
|
|
110
|
+
lines: list[str]
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def exit_code(self) -> int:
|
|
114
|
+
"""Return the stable exit code for this outcome."""
|
|
115
|
+
|
|
116
|
+
return INIT_EXIT_CODES[self.outcome]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def run_init(
|
|
120
|
+
*,
|
|
121
|
+
repo_root: Path,
|
|
122
|
+
repo_id_override: str | None,
|
|
123
|
+
host_mode: str,
|
|
124
|
+
skip_model_download: bool,
|
|
125
|
+
) -> InitResult:
|
|
126
|
+
"""Bootstrap or repair the managed Shellbrain environment."""
|
|
127
|
+
|
|
128
|
+
home_root = get_shellbrain_home()
|
|
129
|
+
home_root.mkdir(parents=True, exist_ok=True)
|
|
130
|
+
notes: list[str] = []
|
|
131
|
+
mutated_machine = False
|
|
132
|
+
mutated_repo = False
|
|
133
|
+
config_corruption_recovered = False
|
|
134
|
+
repair_performed = False
|
|
135
|
+
existing_registration = load_repo_registration(repo_root)
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
with _acquire_init_lock():
|
|
139
|
+
_ensure_dependencies()
|
|
140
|
+
machine_config, machine_error = try_load_machine_config()
|
|
141
|
+
if machine_error:
|
|
142
|
+
backup_path = backup_corrupt_machine_config()
|
|
143
|
+
if backup_path is not None:
|
|
144
|
+
notes.append(f"Preserved corrupt machine config at {backup_path}")
|
|
145
|
+
recovered = _recover_machine_config_from_docker()
|
|
146
|
+
if recovered is None:
|
|
147
|
+
save_recovery_stub(
|
|
148
|
+
current_step="config_recovery",
|
|
149
|
+
last_error=machine_error,
|
|
150
|
+
)
|
|
151
|
+
return InitResult(
|
|
152
|
+
outcome=INIT_OUTCOME_BLOCKED_CONFIG_CORRUPT,
|
|
153
|
+
lines=[
|
|
154
|
+
"Unable to recover a managed Shellbrain instance from the corrupt machine config.",
|
|
155
|
+
"Rerun after resolving Docker/resource conflicts or remove the corrupt config manually if this is a fresh install.",
|
|
156
|
+
*notes,
|
|
157
|
+
],
|
|
158
|
+
)
|
|
159
|
+
machine_config = update_bootstrap_state(
|
|
160
|
+
recovered,
|
|
161
|
+
bootstrap_state=BOOTSTRAP_STATE_REPAIR_NEEDED,
|
|
162
|
+
current_step="config_recovery",
|
|
163
|
+
last_error=machine_error,
|
|
164
|
+
)
|
|
165
|
+
save_machine_config(machine_config)
|
|
166
|
+
config_corruption_recovered = True
|
|
167
|
+
mutated_machine = True
|
|
168
|
+
|
|
169
|
+
if machine_config is None:
|
|
170
|
+
machine_config = _build_fresh_machine_config()
|
|
171
|
+
save_machine_config(machine_config)
|
|
172
|
+
mutated_machine = True
|
|
173
|
+
|
|
174
|
+
machine_config = _migrate_machine_config(machine_config)
|
|
175
|
+
should_repair = (
|
|
176
|
+
machine_config.bootstrap_state == BOOTSTRAP_STATE_REPAIR_NEEDED or config_corruption_recovered
|
|
177
|
+
)
|
|
178
|
+
machine_config = update_bootstrap_state(
|
|
179
|
+
machine_config,
|
|
180
|
+
bootstrap_state=BOOTSTRAP_STATE_PROVISIONING,
|
|
181
|
+
current_step="managed_instance",
|
|
182
|
+
last_error=None,
|
|
183
|
+
)
|
|
184
|
+
save_machine_config(machine_config)
|
|
185
|
+
|
|
186
|
+
container_changed = _ensure_managed_container(machine_config)
|
|
187
|
+
mutated_machine = mutated_machine or container_changed
|
|
188
|
+
|
|
189
|
+
if should_repair:
|
|
190
|
+
_backup_before_repair(machine_config)
|
|
191
|
+
notes.append("Created a backup before repairing the managed instance.")
|
|
192
|
+
repair_performed = True
|
|
193
|
+
|
|
194
|
+
_wait_for_postgres(machine_config.database.admin_dsn)
|
|
195
|
+
|
|
196
|
+
machine_config = update_bootstrap_state(
|
|
197
|
+
machine_config,
|
|
198
|
+
bootstrap_state=BOOTSTRAP_STATE_PROVISIONING,
|
|
199
|
+
current_step="database_reconcile",
|
|
200
|
+
last_error=None,
|
|
201
|
+
)
|
|
202
|
+
save_machine_config(machine_config)
|
|
203
|
+
db_changed = _reconcile_database(machine_config)
|
|
204
|
+
mutated_machine = mutated_machine or db_changed
|
|
205
|
+
|
|
206
|
+
machine_config = update_bootstrap_state(
|
|
207
|
+
machine_config,
|
|
208
|
+
bootstrap_state=BOOTSTRAP_STATE_PROVISIONING,
|
|
209
|
+
current_step="embeddings",
|
|
210
|
+
last_error=None,
|
|
211
|
+
)
|
|
212
|
+
save_machine_config(machine_config)
|
|
213
|
+
embedding_changed, machine_config = _prewarm_embeddings(
|
|
214
|
+
machine_config,
|
|
215
|
+
skip_model_download=skip_model_download,
|
|
216
|
+
)
|
|
217
|
+
save_machine_config(machine_config)
|
|
218
|
+
mutated_machine = mutated_machine or embedding_changed
|
|
219
|
+
|
|
220
|
+
machine_config = update_bootstrap_state(
|
|
221
|
+
machine_config,
|
|
222
|
+
bootstrap_state=BOOTSTRAP_STATE_PROVISIONING,
|
|
223
|
+
current_step="repo_registration",
|
|
224
|
+
last_error=None,
|
|
225
|
+
)
|
|
226
|
+
save_machine_config(machine_config)
|
|
227
|
+
registration, repo_changed = _register_repo(
|
|
228
|
+
repo_root=repo_root,
|
|
229
|
+
repo_id_override=repo_id_override,
|
|
230
|
+
machine_instance_id=machine_config.machine_instance_id,
|
|
231
|
+
)
|
|
232
|
+
mutated_repo = mutated_repo or repo_changed
|
|
233
|
+
|
|
234
|
+
claude_note = _handle_claude_integration(
|
|
235
|
+
repo_root=repo_root,
|
|
236
|
+
registration=registration,
|
|
237
|
+
host_mode=host_mode,
|
|
238
|
+
)
|
|
239
|
+
if claude_note:
|
|
240
|
+
notes.append(claude_note)
|
|
241
|
+
registration = register_repo(
|
|
242
|
+
repo_root=repo_root,
|
|
243
|
+
machine_instance_id=machine_config.machine_instance_id,
|
|
244
|
+
explicit_repo_id=registration.repo_id if registration.identity_strength == "explicit" else None,
|
|
245
|
+
claude_status=_claude_status_for_note(claude_note),
|
|
246
|
+
claude_settings_path=str(repo_root / ".claude" / "settings.local.json") if "Installed Claude hook" in claude_note else registration.claude_settings_path,
|
|
247
|
+
claude_note=claude_note,
|
|
248
|
+
)
|
|
249
|
+
mutated_repo = True
|
|
250
|
+
|
|
251
|
+
machine_config = update_bootstrap_state(
|
|
252
|
+
machine_config,
|
|
253
|
+
bootstrap_state=BOOTSTRAP_STATE_READY,
|
|
254
|
+
current_step="verification",
|
|
255
|
+
last_error=None,
|
|
256
|
+
)
|
|
257
|
+
save_machine_config(machine_config)
|
|
258
|
+
|
|
259
|
+
outcome = _determine_outcome(
|
|
260
|
+
mutated_machine=mutated_machine,
|
|
261
|
+
mutated_repo=mutated_repo,
|
|
262
|
+
existing_registration=existing_registration,
|
|
263
|
+
repair_performed=repair_performed,
|
|
264
|
+
config_corruption_recovered=config_corruption_recovered,
|
|
265
|
+
)
|
|
266
|
+
lines = _render_success_lines(
|
|
267
|
+
outcome=outcome,
|
|
268
|
+
config=machine_config,
|
|
269
|
+
registration=registration,
|
|
270
|
+
notes=notes,
|
|
271
|
+
)
|
|
272
|
+
return InitResult(outcome=outcome, lines=lines)
|
|
273
|
+
except InitDependencyError as exc:
|
|
274
|
+
return InitResult(outcome=INIT_OUTCOME_BLOCKED_DEPENDENCY, lines=[str(exc)])
|
|
275
|
+
except InitConflictError as exc:
|
|
276
|
+
_mark_repair_needed(str(exc))
|
|
277
|
+
return InitResult(outcome=INIT_OUTCOME_BLOCKED_CONFLICT, lines=[str(exc)])
|
|
278
|
+
except InitLockError as exc:
|
|
279
|
+
return InitResult(outcome=INIT_OUTCOME_BLOCKED_LOCK, lines=[str(exc)])
|
|
280
|
+
except Exception as exc: # pragma: no cover - fail closed in init path
|
|
281
|
+
_mark_repair_needed(str(exc))
|
|
282
|
+
raise
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
@contextmanager
|
|
286
|
+
def _acquire_init_lock() -> Iterator[None]:
|
|
287
|
+
"""Acquire a machine-scoped init lock with stale lock recovery."""
|
|
288
|
+
|
|
289
|
+
lock_path = get_machine_lock_path()
|
|
290
|
+
deadline = time.time() + _LOCK_TIMEOUT_SECONDS
|
|
291
|
+
while True:
|
|
292
|
+
try:
|
|
293
|
+
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
294
|
+
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
295
|
+
payload = {
|
|
296
|
+
"pid": os.getpid(),
|
|
297
|
+
"hostname": socket.gethostname(),
|
|
298
|
+
"command": " ".join(sys.argv),
|
|
299
|
+
"started_at": datetime.now(timezone.utc).isoformat(),
|
|
300
|
+
}
|
|
301
|
+
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
|
302
|
+
json.dump(payload, handle, indent=2, sort_keys=True)
|
|
303
|
+
try:
|
|
304
|
+
yield
|
|
305
|
+
finally:
|
|
306
|
+
try:
|
|
307
|
+
lock_path.unlink()
|
|
308
|
+
except FileNotFoundError:
|
|
309
|
+
pass
|
|
310
|
+
return
|
|
311
|
+
except FileExistsError:
|
|
312
|
+
if _clear_stale_lock(lock_path):
|
|
313
|
+
continue
|
|
314
|
+
if time.time() >= deadline:
|
|
315
|
+
holder = _read_lock_holder(lock_path)
|
|
316
|
+
raise InitLockError(
|
|
317
|
+
f"Shellbrain init is already running for this machine state. Lock holder: {holder or 'unknown'}"
|
|
318
|
+
)
|
|
319
|
+
time.sleep(1)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _clear_stale_lock(lock_path: Path) -> bool:
|
|
323
|
+
"""Remove one stale init lock when the owning process is gone."""
|
|
324
|
+
|
|
325
|
+
holder = _read_lock_payload(lock_path)
|
|
326
|
+
if holder is None:
|
|
327
|
+
return False
|
|
328
|
+
started_at = holder.get("started_at")
|
|
329
|
+
pid = holder.get("pid")
|
|
330
|
+
if not isinstance(started_at, str) or not isinstance(pid, int):
|
|
331
|
+
return False
|
|
332
|
+
age = datetime.now(timezone.utc) - datetime.fromisoformat(started_at.replace("Z", "+00:00"))
|
|
333
|
+
if age < timedelta(minutes=_STALE_LOCK_MINUTES):
|
|
334
|
+
return False
|
|
335
|
+
if _pid_exists(pid):
|
|
336
|
+
return False
|
|
337
|
+
try:
|
|
338
|
+
lock_path.unlink()
|
|
339
|
+
except FileNotFoundError:
|
|
340
|
+
return True
|
|
341
|
+
return True
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _pid_exists(pid: int) -> bool:
|
|
345
|
+
"""Return whether one process id still exists."""
|
|
346
|
+
|
|
347
|
+
if pid <= 0:
|
|
348
|
+
return False
|
|
349
|
+
try:
|
|
350
|
+
os.kill(pid, 0)
|
|
351
|
+
except ProcessLookupError:
|
|
352
|
+
return False
|
|
353
|
+
except PermissionError:
|
|
354
|
+
return True
|
|
355
|
+
return True
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _read_lock_payload(lock_path: Path) -> dict[str, object] | None:
|
|
359
|
+
"""Return parsed lock metadata when available."""
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
return json.loads(lock_path.read_text(encoding="utf-8"))
|
|
363
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _read_lock_holder(lock_path: Path) -> str | None:
|
|
368
|
+
"""Return a short human-readable lock holder description."""
|
|
369
|
+
|
|
370
|
+
payload = _read_lock_payload(lock_path)
|
|
371
|
+
if payload is None:
|
|
372
|
+
return None
|
|
373
|
+
pid = payload.get("pid")
|
|
374
|
+
hostname = payload.get("hostname")
|
|
375
|
+
command = payload.get("command")
|
|
376
|
+
return f"pid={pid} host={hostname} command={command}"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _ensure_dependencies() -> None:
|
|
380
|
+
"""Verify bootstrap dependencies before mutation."""
|
|
381
|
+
|
|
382
|
+
if sys.version_info < (3, 11):
|
|
383
|
+
raise InitDependencyError("Shellbrain init requires Python 3.11 or newer.")
|
|
384
|
+
if shutil.which("docker") is None:
|
|
385
|
+
raise InitDependencyError("Shellbrain init requires Docker to be installed.")
|
|
386
|
+
completed = subprocess.run(
|
|
387
|
+
["docker", "info"],
|
|
388
|
+
capture_output=True,
|
|
389
|
+
text=True,
|
|
390
|
+
check=False,
|
|
391
|
+
)
|
|
392
|
+
if completed.returncode != 0:
|
|
393
|
+
raise InitDependencyError("Shellbrain init requires the Docker daemon to be running and reachable.")
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _build_fresh_machine_config() -> MachineConfig:
|
|
397
|
+
"""Construct a fresh machine config for managed-local mode."""
|
|
398
|
+
|
|
399
|
+
runtime = get_config_provider().get_runtime()
|
|
400
|
+
embeddings = runtime.get("embeddings")
|
|
401
|
+
if not isinstance(embeddings, dict):
|
|
402
|
+
raise RuntimeError("runtime.embeddings must be configured")
|
|
403
|
+
home_hash = _home_hash()
|
|
404
|
+
port = _select_managed_port()
|
|
405
|
+
admin_password = secrets.token_hex(16)
|
|
406
|
+
app_password = secrets.token_hex(16)
|
|
407
|
+
admin_dsn = f"postgresql+psycopg://{_MANAGED_ADMIN_USER}:{admin_password}@{_MANAGED_HOST}:{port}/{_MANAGED_DB_NAME}"
|
|
408
|
+
app_dsn = f"postgresql+psycopg://{_MANAGED_APP_USER}:{app_password}@{_MANAGED_HOST}:{port}/{_MANAGED_DB_NAME}"
|
|
409
|
+
instance_id = dsn_fingerprint(admin_dsn)
|
|
410
|
+
return MachineConfig(
|
|
411
|
+
config_version=CONFIG_VERSION,
|
|
412
|
+
bootstrap_version=BOOTSTRAP_VERSION,
|
|
413
|
+
runtime_mode="managed_local",
|
|
414
|
+
bootstrap_state=BOOTSTRAP_STATE_PROVISIONING,
|
|
415
|
+
current_step="bootstrap",
|
|
416
|
+
last_error=None,
|
|
417
|
+
database=DatabaseState(app_dsn=app_dsn, admin_dsn=admin_dsn),
|
|
418
|
+
managed=ManagedInstanceState(
|
|
419
|
+
instance_id=instance_id,
|
|
420
|
+
container_name=f"shellbrain-postgres-{home_hash[:8]}",
|
|
421
|
+
image=_MANAGED_IMAGE,
|
|
422
|
+
host=_MANAGED_HOST,
|
|
423
|
+
port=port,
|
|
424
|
+
db_name=_MANAGED_DB_NAME,
|
|
425
|
+
data_dir=str(get_machine_postgres_data_dir()),
|
|
426
|
+
admin_user=_MANAGED_ADMIN_USER,
|
|
427
|
+
admin_password=admin_password,
|
|
428
|
+
app_user=_MANAGED_APP_USER,
|
|
429
|
+
app_password=app_password,
|
|
430
|
+
),
|
|
431
|
+
backups=BackupState(root=str(get_machine_backups_dir()), mirror_root=None),
|
|
432
|
+
embeddings=EmbeddingRuntimeState(
|
|
433
|
+
provider=str(embeddings.get("provider") or "sentence_transformers"),
|
|
434
|
+
model=str(embeddings.get("model") or "all-MiniLM-L6-v2"),
|
|
435
|
+
model_revision=None,
|
|
436
|
+
backend_version=None,
|
|
437
|
+
cache_path=str(get_machine_models_dir()),
|
|
438
|
+
readiness_state="pending",
|
|
439
|
+
last_error=None,
|
|
440
|
+
),
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _migrate_machine_config(config: MachineConfig) -> MachineConfig:
|
|
445
|
+
"""Upgrade a machine config to the current schema versions."""
|
|
446
|
+
|
|
447
|
+
if config.config_version > CONFIG_VERSION or config.bootstrap_version > BOOTSTRAP_VERSION:
|
|
448
|
+
raise InitConflictError("Machine config version is newer than this Shellbrain build can manage.")
|
|
449
|
+
if config.config_version == CONFIG_VERSION and config.bootstrap_version == BOOTSTRAP_VERSION:
|
|
450
|
+
return config
|
|
451
|
+
return MachineConfig(
|
|
452
|
+
config_version=CONFIG_VERSION,
|
|
453
|
+
bootstrap_version=BOOTSTRAP_VERSION,
|
|
454
|
+
runtime_mode=config.runtime_mode,
|
|
455
|
+
bootstrap_state=config.bootstrap_state,
|
|
456
|
+
current_step=config.current_step,
|
|
457
|
+
last_error=config.last_error,
|
|
458
|
+
database=config.database,
|
|
459
|
+
managed=config.managed,
|
|
460
|
+
backups=config.backups,
|
|
461
|
+
embeddings=config.embeddings,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _ensure_managed_container(config: MachineConfig) -> bool:
|
|
466
|
+
"""Create or start the managed Postgres container."""
|
|
467
|
+
|
|
468
|
+
info = _inspect_container(config.managed.container_name)
|
|
469
|
+
if info is None:
|
|
470
|
+
_create_managed_container(config)
|
|
471
|
+
_start_container(config.managed.container_name)
|
|
472
|
+
return True
|
|
473
|
+
labels = info.get("Config", {}).get("Labels", {}) or {}
|
|
474
|
+
if labels.get(_MANAGED_LABEL) != "true" or labels.get(_MANAGED_HOME_LABEL) != _home_hash():
|
|
475
|
+
raise InitConflictError(
|
|
476
|
+
f"Container {config.managed.container_name} already exists but is not owned by Shellbrain for this machine state."
|
|
477
|
+
)
|
|
478
|
+
if labels.get(_MANAGED_INSTANCE_LABEL) != config.machine_instance_id:
|
|
479
|
+
raise InitConflictError(
|
|
480
|
+
f"Managed container {config.managed.container_name} does not match the configured Shellbrain instance id."
|
|
481
|
+
)
|
|
482
|
+
state = info.get("State", {}) or {}
|
|
483
|
+
if not state.get("Running"):
|
|
484
|
+
_start_container(config.managed.container_name)
|
|
485
|
+
return True
|
|
486
|
+
return False
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _create_managed_container(config: MachineConfig) -> None:
|
|
490
|
+
"""Create the managed Postgres container with Shellbrain-owned labels."""
|
|
491
|
+
|
|
492
|
+
data_dir = Path(config.managed.data_dir)
|
|
493
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
|
494
|
+
command = [
|
|
495
|
+
"docker",
|
|
496
|
+
"create",
|
|
497
|
+
"--name",
|
|
498
|
+
config.managed.container_name,
|
|
499
|
+
"--label",
|
|
500
|
+
f"{_MANAGED_LABEL}=true",
|
|
501
|
+
"--label",
|
|
502
|
+
f"{_MANAGED_HOME_LABEL}={_home_hash()}",
|
|
503
|
+
"--label",
|
|
504
|
+
f"{_MANAGED_INSTANCE_LABEL}={config.machine_instance_id}",
|
|
505
|
+
"--health-cmd",
|
|
506
|
+
f"pg_isready -U {config.managed.admin_user} -d {config.managed.db_name}",
|
|
507
|
+
"--health-interval",
|
|
508
|
+
"10s",
|
|
509
|
+
"--health-timeout",
|
|
510
|
+
"5s",
|
|
511
|
+
"--health-retries",
|
|
512
|
+
"10",
|
|
513
|
+
"-e",
|
|
514
|
+
f"POSTGRES_DB={config.managed.db_name}",
|
|
515
|
+
"-e",
|
|
516
|
+
f"POSTGRES_USER={config.managed.admin_user}",
|
|
517
|
+
"-e",
|
|
518
|
+
f"POSTGRES_PASSWORD={config.managed.admin_password}",
|
|
519
|
+
"-e",
|
|
520
|
+
f"SHELLBRAIN_APP_USER={config.managed.app_user}",
|
|
521
|
+
"-e",
|
|
522
|
+
f"SHELLBRAIN_APP_PASSWORD={config.managed.app_password}",
|
|
523
|
+
"-p",
|
|
524
|
+
f"{config.managed.port}:5432",
|
|
525
|
+
"-v",
|
|
526
|
+
f"{config.managed.data_dir}:/var/lib/postgresql/data",
|
|
527
|
+
config.managed.image,
|
|
528
|
+
]
|
|
529
|
+
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
|
530
|
+
if completed.returncode != 0:
|
|
531
|
+
raise InitConflictError(completed.stderr.strip() or f"Failed to create container {config.managed.container_name}.")
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _start_container(container_name: str) -> None:
|
|
535
|
+
"""Start one existing Docker container."""
|
|
536
|
+
|
|
537
|
+
completed = subprocess.run(
|
|
538
|
+
["docker", "start", container_name],
|
|
539
|
+
capture_output=True,
|
|
540
|
+
text=True,
|
|
541
|
+
check=False,
|
|
542
|
+
)
|
|
543
|
+
if completed.returncode != 0:
|
|
544
|
+
raise InitConflictError(completed.stderr.strip() or f"Failed to start container {container_name}.")
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def _inspect_container(container_name: str) -> dict[str, object] | None:
|
|
548
|
+
"""Return one docker inspect payload when the container exists."""
|
|
549
|
+
|
|
550
|
+
completed = subprocess.run(
|
|
551
|
+
["docker", "inspect", container_name],
|
|
552
|
+
capture_output=True,
|
|
553
|
+
text=True,
|
|
554
|
+
check=False,
|
|
555
|
+
)
|
|
556
|
+
if completed.returncode != 0:
|
|
557
|
+
return None
|
|
558
|
+
payload = json.loads(completed.stdout)
|
|
559
|
+
if not payload:
|
|
560
|
+
return None
|
|
561
|
+
if not isinstance(payload[0], dict):
|
|
562
|
+
return None
|
|
563
|
+
return payload[0]
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def _wait_for_postgres(admin_dsn: str) -> None:
|
|
567
|
+
"""Wait for managed Postgres to accept connections."""
|
|
568
|
+
|
|
569
|
+
deadline = time.time() + 45
|
|
570
|
+
raw_dsn = admin_dsn.replace("+psycopg", "")
|
|
571
|
+
while True:
|
|
572
|
+
try:
|
|
573
|
+
with psycopg.connect(raw_dsn, connect_timeout=2):
|
|
574
|
+
return
|
|
575
|
+
except psycopg.Error:
|
|
576
|
+
if time.time() >= deadline:
|
|
577
|
+
raise InitConflictError("Managed Postgres did not become ready in time.")
|
|
578
|
+
time.sleep(1)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def _backup_before_repair(config: MachineConfig) -> None:
|
|
582
|
+
"""Create and verify a logical backup before mutating an existing managed instance."""
|
|
583
|
+
|
|
584
|
+
backup_and_verify_before_destructive_action(
|
|
585
|
+
admin_dsn=config.database.admin_dsn,
|
|
586
|
+
backup_root=Path(config.backups.root),
|
|
587
|
+
container_name=config.managed.container_name,
|
|
588
|
+
container_db_name=config.managed.db_name,
|
|
589
|
+
container_admin_user=config.managed.admin_user,
|
|
590
|
+
container_admin_password=config.managed.admin_password,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def _reconcile_database(config: MachineConfig) -> bool:
|
|
595
|
+
"""Create or repair managed roles, database, extension, and grants."""
|
|
596
|
+
|
|
597
|
+
changed = False
|
|
598
|
+
raw_admin_dsn = config.database.admin_dsn.replace("+psycopg", "")
|
|
599
|
+
postgres_dsn = _replace_database(raw_admin_dsn, "postgres")
|
|
600
|
+
with psycopg.connect(postgres_dsn, autocommit=True) as conn:
|
|
601
|
+
with conn.cursor() as cur:
|
|
602
|
+
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (config.managed.db_name,))
|
|
603
|
+
if cur.fetchone() is None:
|
|
604
|
+
cur.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(config.managed.db_name)))
|
|
605
|
+
changed = True
|
|
606
|
+
|
|
607
|
+
with psycopg.connect(raw_admin_dsn, autocommit=True) as conn:
|
|
608
|
+
with conn.cursor() as cur:
|
|
609
|
+
cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (config.managed.app_user,))
|
|
610
|
+
if cur.fetchone() is None:
|
|
611
|
+
cur.execute(
|
|
612
|
+
sql.SQL("CREATE ROLE {} LOGIN NOSUPERUSER NOCREATEDB NOCREATEROLE PASSWORD {}").format(
|
|
613
|
+
sql.Identifier(config.managed.app_user),
|
|
614
|
+
sql.Literal(config.managed.app_password),
|
|
615
|
+
),
|
|
616
|
+
)
|
|
617
|
+
changed = True
|
|
618
|
+
else:
|
|
619
|
+
cur.execute(
|
|
620
|
+
sql.SQL("ALTER ROLE {} WITH PASSWORD {}").format(
|
|
621
|
+
sql.Identifier(config.managed.app_user),
|
|
622
|
+
sql.Literal(config.managed.app_password),
|
|
623
|
+
),
|
|
624
|
+
)
|
|
625
|
+
cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
626
|
+
cur.execute(
|
|
627
|
+
sql.SQL("GRANT CONNECT ON DATABASE {} TO {}").format(
|
|
628
|
+
sql.Identifier(config.managed.db_name),
|
|
629
|
+
sql.Identifier(config.managed.app_user),
|
|
630
|
+
)
|
|
631
|
+
)
|
|
632
|
+
reconcile_app_role_privileges(admin_dsn=config.database.admin_dsn, app_dsn=config.database.app_dsn)
|
|
633
|
+
ensure_instance_metadata(
|
|
634
|
+
config.database.admin_dsn,
|
|
635
|
+
instance_mode="live",
|
|
636
|
+
created_by="app.init",
|
|
637
|
+
notes="Managed local Shellbrain instance",
|
|
638
|
+
)
|
|
639
|
+
return changed
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _prewarm_embeddings(config: MachineConfig, *, skip_model_download: bool) -> tuple[bool, MachineConfig]:
|
|
643
|
+
"""Prewarm the configured embedding backend and pin its runtime metadata."""
|
|
644
|
+
|
|
645
|
+
backend_version = None
|
|
646
|
+
try:
|
|
647
|
+
backend_version = importlib.metadata.version("sentence-transformers")
|
|
648
|
+
except importlib.metadata.PackageNotFoundError:
|
|
649
|
+
backend_version = None
|
|
650
|
+
if skip_model_download:
|
|
651
|
+
updated = MachineConfig(
|
|
652
|
+
config_version=config.config_version,
|
|
653
|
+
bootstrap_version=config.bootstrap_version,
|
|
654
|
+
runtime_mode=config.runtime_mode,
|
|
655
|
+
bootstrap_state=config.bootstrap_state,
|
|
656
|
+
current_step=config.current_step,
|
|
657
|
+
last_error=config.last_error,
|
|
658
|
+
database=config.database,
|
|
659
|
+
managed=config.managed,
|
|
660
|
+
backups=config.backups,
|
|
661
|
+
embeddings=EmbeddingRuntimeState(
|
|
662
|
+
provider=config.embeddings.provider,
|
|
663
|
+
model=config.embeddings.model,
|
|
664
|
+
model_revision=config.embeddings.model_revision,
|
|
665
|
+
backend_version=backend_version,
|
|
666
|
+
cache_path=config.embeddings.cache_path,
|
|
667
|
+
readiness_state="skipped",
|
|
668
|
+
last_error="Model prewarm was skipped during init.",
|
|
669
|
+
),
|
|
670
|
+
)
|
|
671
|
+
return True, updated
|
|
672
|
+
|
|
673
|
+
os.environ["HF_HOME"] = config.embeddings.cache_path
|
|
674
|
+
Path(config.embeddings.cache_path).mkdir(parents=True, exist_ok=True)
|
|
675
|
+
from app.periphery.embeddings.local_provider import SentenceTransformersEmbeddingProvider
|
|
676
|
+
|
|
677
|
+
provider = SentenceTransformersEmbeddingProvider(
|
|
678
|
+
model=config.embeddings.model,
|
|
679
|
+
cache_folder=config.embeddings.cache_path,
|
|
680
|
+
)
|
|
681
|
+
try:
|
|
682
|
+
provider.embed("shellbrain init warmup")
|
|
683
|
+
except Exception as exc:
|
|
684
|
+
updated = MachineConfig(
|
|
685
|
+
config_version=config.config_version,
|
|
686
|
+
bootstrap_version=config.bootstrap_version,
|
|
687
|
+
runtime_mode=config.runtime_mode,
|
|
688
|
+
bootstrap_state=BOOTSTRAP_STATE_REPAIR_NEEDED,
|
|
689
|
+
current_step="embeddings",
|
|
690
|
+
last_error=str(exc),
|
|
691
|
+
database=config.database,
|
|
692
|
+
managed=config.managed,
|
|
693
|
+
backups=config.backups,
|
|
694
|
+
embeddings=EmbeddingRuntimeState(
|
|
695
|
+
provider=config.embeddings.provider,
|
|
696
|
+
model=config.embeddings.model,
|
|
697
|
+
model_revision=config.embeddings.model_revision,
|
|
698
|
+
backend_version=backend_version,
|
|
699
|
+
cache_path=config.embeddings.cache_path,
|
|
700
|
+
readiness_state="failed",
|
|
701
|
+
last_error=str(exc),
|
|
702
|
+
),
|
|
703
|
+
)
|
|
704
|
+
return True, updated
|
|
705
|
+
updated = MachineConfig(
|
|
706
|
+
config_version=config.config_version,
|
|
707
|
+
bootstrap_version=config.bootstrap_version,
|
|
708
|
+
runtime_mode=config.runtime_mode,
|
|
709
|
+
bootstrap_state=config.bootstrap_state,
|
|
710
|
+
current_step=config.current_step,
|
|
711
|
+
last_error=config.last_error,
|
|
712
|
+
database=config.database,
|
|
713
|
+
managed=config.managed,
|
|
714
|
+
backups=config.backups,
|
|
715
|
+
embeddings=EmbeddingRuntimeState(
|
|
716
|
+
provider=config.embeddings.provider,
|
|
717
|
+
model=config.embeddings.model,
|
|
718
|
+
model_revision=config.embeddings.model_revision,
|
|
719
|
+
backend_version=backend_version,
|
|
720
|
+
cache_path=config.embeddings.cache_path,
|
|
721
|
+
readiness_state="ready",
|
|
722
|
+
last_error=None,
|
|
723
|
+
),
|
|
724
|
+
)
|
|
725
|
+
return config.embeddings.readiness_state != "ready" or config.embeddings.backend_version != backend_version, updated
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _register_repo(
|
|
729
|
+
*,
|
|
730
|
+
repo_root: Path,
|
|
731
|
+
repo_id_override: str | None,
|
|
732
|
+
machine_instance_id: str,
|
|
733
|
+
) -> tuple[RepoRegistration, bool]:
|
|
734
|
+
"""Register the current repo against the active machine instance."""
|
|
735
|
+
|
|
736
|
+
existing = load_repo_registration(repo_root)
|
|
737
|
+
registration = register_repo(
|
|
738
|
+
repo_root=repo_root,
|
|
739
|
+
machine_instance_id=machine_instance_id,
|
|
740
|
+
explicit_repo_id=repo_id_override,
|
|
741
|
+
claude_status=existing.claude_status if existing is not None else "not_checked",
|
|
742
|
+
claude_settings_path=existing.claude_settings_path if existing is not None else None,
|
|
743
|
+
claude_note=existing.claude_note if existing is not None else None,
|
|
744
|
+
)
|
|
745
|
+
return registration, existing != registration
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def _handle_claude_integration(*, repo_root: Path, registration: RepoRegistration, host_mode: str) -> str | None:
|
|
749
|
+
"""Install the Claude hook when eligible, otherwise explain why it was skipped."""
|
|
750
|
+
|
|
751
|
+
repo_signal = (repo_root / ".claude").exists() or (repo_root / ".claude" / "settings.local.json").exists()
|
|
752
|
+
if host_mode == "none" or host_mode == "auto" and not repo_signal:
|
|
753
|
+
return None
|
|
754
|
+
if host_mode == "claude" or (host_mode == "auto" and repo_signal):
|
|
755
|
+
settings_path = install_claude_hook(repo_root=repo_root)
|
|
756
|
+
return f"Installed Claude hook at {settings_path}"
|
|
757
|
+
return None
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def _claude_status_for_note(note: str) -> str:
|
|
761
|
+
"""Return repo-local Claude status for one init note."""
|
|
762
|
+
|
|
763
|
+
if note.startswith("Installed Claude hook"):
|
|
764
|
+
return "installed"
|
|
765
|
+
if note.startswith("Claude repo detected"):
|
|
766
|
+
return "eligible_repo_only"
|
|
767
|
+
return "not_applicable"
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def _determine_outcome(
|
|
771
|
+
*,
|
|
772
|
+
mutated_machine: bool,
|
|
773
|
+
mutated_repo: bool,
|
|
774
|
+
existing_registration: RepoRegistration | None,
|
|
775
|
+
repair_performed: bool,
|
|
776
|
+
config_corruption_recovered: bool,
|
|
777
|
+
) -> str:
|
|
778
|
+
"""Resolve the final init outcome class."""
|
|
779
|
+
|
|
780
|
+
if config_corruption_recovered or repair_performed:
|
|
781
|
+
return INIT_OUTCOME_REPAIRED
|
|
782
|
+
if existing_registration is None and mutated_repo:
|
|
783
|
+
return INIT_OUTCOME_INITIALIZED
|
|
784
|
+
if mutated_machine or mutated_repo:
|
|
785
|
+
return INIT_OUTCOME_INITIALIZED
|
|
786
|
+
return INIT_OUTCOME_NOOP
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def _render_success_lines(
|
|
790
|
+
*,
|
|
791
|
+
outcome: str,
|
|
792
|
+
config: MachineConfig,
|
|
793
|
+
registration: RepoRegistration,
|
|
794
|
+
notes: list[str],
|
|
795
|
+
) -> list[str]:
|
|
796
|
+
"""Render the init success summary lines without the outcome prefix."""
|
|
797
|
+
|
|
798
|
+
lines = [
|
|
799
|
+
f"Managed instance: {config.managed.container_name} ({config.managed.host}:{config.managed.port})",
|
|
800
|
+
f"Repo: {registration.repo_id}",
|
|
801
|
+
f"Embeddings: {config.embeddings.readiness_state}",
|
|
802
|
+
f"Backups: {config.backups.root}",
|
|
803
|
+
f"Next: shellbrain read --json '{{\"query\":\"What prior Shellbrain context matters for this task?\",\"kinds\":[\"problem\",\"solution\",\"failed_tactic\",\"fact\",\"preference\",\"change\"]}}'",
|
|
804
|
+
]
|
|
805
|
+
if registration.identity_strength == IDENTITY_STRENGTH_WEAK_LOCAL:
|
|
806
|
+
lines.insert(1, "Repo identity is weak-local and will change if this directory moves. Use --repo-id for a durable override.")
|
|
807
|
+
lines.extend(notes)
|
|
808
|
+
return lines
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def _mark_repair_needed(message: str) -> None:
|
|
812
|
+
"""Best-effort mark of the machine state after an unexpected init failure."""
|
|
813
|
+
|
|
814
|
+
config, error = try_load_machine_config()
|
|
815
|
+
if error is not None or config is None:
|
|
816
|
+
save_recovery_stub(current_step="unexpected_failure", last_error=message)
|
|
817
|
+
return
|
|
818
|
+
save_machine_config(
|
|
819
|
+
update_bootstrap_state(
|
|
820
|
+
config,
|
|
821
|
+
bootstrap_state=BOOTSTRAP_STATE_REPAIR_NEEDED,
|
|
822
|
+
current_step=config.current_step or "unexpected_failure",
|
|
823
|
+
last_error=message,
|
|
824
|
+
)
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _recover_machine_config_from_docker() -> MachineConfig | None:
|
|
829
|
+
"""Attempt to recover one unique managed instance for the current home root."""
|
|
830
|
+
|
|
831
|
+
completed = subprocess.run(
|
|
832
|
+
[
|
|
833
|
+
"docker",
|
|
834
|
+
"ps",
|
|
835
|
+
"-a",
|
|
836
|
+
"--filter",
|
|
837
|
+
f"label={_MANAGED_LABEL}=true",
|
|
838
|
+
"--filter",
|
|
839
|
+
f"label={_MANAGED_HOME_LABEL}={_home_hash()}",
|
|
840
|
+
"--format",
|
|
841
|
+
"{{.Names}}",
|
|
842
|
+
],
|
|
843
|
+
capture_output=True,
|
|
844
|
+
text=True,
|
|
845
|
+
check=False,
|
|
846
|
+
)
|
|
847
|
+
if completed.returncode != 0:
|
|
848
|
+
return None
|
|
849
|
+
names = [line.strip() for line in completed.stdout.splitlines() if line.strip()]
|
|
850
|
+
if len(names) != 1:
|
|
851
|
+
return None
|
|
852
|
+
info = _inspect_container(names[0])
|
|
853
|
+
if info is None:
|
|
854
|
+
return None
|
|
855
|
+
env_map: dict[str, str] = {}
|
|
856
|
+
for item in info.get("Config", {}).get("Env", []) or []:
|
|
857
|
+
if not isinstance(item, str) or "=" not in item:
|
|
858
|
+
continue
|
|
859
|
+
key, value = item.split("=", 1)
|
|
860
|
+
env_map[key] = value
|
|
861
|
+
network_settings = info.get("NetworkSettings", {}) or {}
|
|
862
|
+
ports = network_settings.get("Ports", {}) or {}
|
|
863
|
+
host_entries = ports.get("5432/tcp") or []
|
|
864
|
+
if not host_entries or not isinstance(host_entries[0], dict):
|
|
865
|
+
return None
|
|
866
|
+
port = int(host_entries[0]["HostPort"])
|
|
867
|
+
admin_password = env_map.get("POSTGRES_PASSWORD")
|
|
868
|
+
app_password = env_map.get("SHELLBRAIN_APP_PASSWORD")
|
|
869
|
+
if not admin_password or not app_password:
|
|
870
|
+
return None
|
|
871
|
+
admin_dsn = f"postgresql+psycopg://{_MANAGED_ADMIN_USER}:{admin_password}@{_MANAGED_HOST}:{port}/{_MANAGED_DB_NAME}"
|
|
872
|
+
app_dsn = f"postgresql+psycopg://{_MANAGED_APP_USER}:{app_password}@{_MANAGED_HOST}:{port}/{_MANAGED_DB_NAME}"
|
|
873
|
+
return MachineConfig(
|
|
874
|
+
config_version=CONFIG_VERSION,
|
|
875
|
+
bootstrap_version=BOOTSTRAP_VERSION,
|
|
876
|
+
runtime_mode="managed_local",
|
|
877
|
+
bootstrap_state=BOOTSTRAP_STATE_REPAIR_NEEDED,
|
|
878
|
+
current_step="config_recovery",
|
|
879
|
+
last_error=None,
|
|
880
|
+
database=DatabaseState(app_dsn=app_dsn, admin_dsn=admin_dsn),
|
|
881
|
+
managed=ManagedInstanceState(
|
|
882
|
+
instance_id=dsn_fingerprint(admin_dsn),
|
|
883
|
+
container_name=names[0],
|
|
884
|
+
image=str(info.get("Config", {}).get("Image") or _MANAGED_IMAGE),
|
|
885
|
+
host=_MANAGED_HOST,
|
|
886
|
+
port=port,
|
|
887
|
+
db_name=env_map.get("POSTGRES_DB", _MANAGED_DB_NAME),
|
|
888
|
+
data_dir=str(get_machine_postgres_data_dir()),
|
|
889
|
+
admin_user=env_map.get("POSTGRES_USER", _MANAGED_ADMIN_USER),
|
|
890
|
+
admin_password=admin_password,
|
|
891
|
+
app_user=env_map.get("SHELLBRAIN_APP_USER", _MANAGED_APP_USER),
|
|
892
|
+
app_password=app_password,
|
|
893
|
+
),
|
|
894
|
+
backups=BackupState(root=str(get_machine_backups_dir()), mirror_root=None),
|
|
895
|
+
embeddings=EmbeddingRuntimeState(
|
|
896
|
+
provider="sentence_transformers",
|
|
897
|
+
model=str(get_config_provider().get_runtime()["embeddings"]["model"]),
|
|
898
|
+
model_revision=None,
|
|
899
|
+
backend_version=None,
|
|
900
|
+
cache_path=str(get_machine_models_dir()),
|
|
901
|
+
readiness_state="pending",
|
|
902
|
+
last_error=None,
|
|
903
|
+
),
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
def _select_managed_port() -> int:
|
|
908
|
+
"""Select a free reserved port for the managed Postgres instance."""
|
|
909
|
+
|
|
910
|
+
claimed_ports = _managed_claimed_host_ports()
|
|
911
|
+
for port in range(_MANAGED_PORT_START, _MANAGED_PORT_END + 1):
|
|
912
|
+
if port in claimed_ports:
|
|
913
|
+
continue
|
|
914
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
915
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
916
|
+
try:
|
|
917
|
+
sock.bind((_MANAGED_HOST, port))
|
|
918
|
+
except OSError:
|
|
919
|
+
continue
|
|
920
|
+
return port
|
|
921
|
+
raise InitConflictError("No free reserved port is available for the managed Shellbrain Postgres instance.")
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
def _replace_database(dsn: str, db_name: str) -> str:
|
|
925
|
+
"""Replace the database path component of a DSN."""
|
|
926
|
+
|
|
927
|
+
prefix, _, _ = dsn.rpartition("/")
|
|
928
|
+
return f"{prefix}/{db_name}"
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def _home_hash() -> str:
|
|
932
|
+
"""Return a stable short hash for the active Shellbrain home root."""
|
|
933
|
+
|
|
934
|
+
import hashlib
|
|
935
|
+
|
|
936
|
+
return hashlib.sha256(str(get_shellbrain_home()).encode("utf-8")).hexdigest()[:16]
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
def _managed_claimed_host_ports() -> set[int]:
|
|
940
|
+
"""Return reserved host ports already claimed by managed Shellbrain containers."""
|
|
941
|
+
|
|
942
|
+
completed = subprocess.run(
|
|
943
|
+
[
|
|
944
|
+
"docker",
|
|
945
|
+
"ps",
|
|
946
|
+
"-a",
|
|
947
|
+
"--filter",
|
|
948
|
+
f"label={_MANAGED_LABEL}=true",
|
|
949
|
+
"--format",
|
|
950
|
+
"{{.Names}}",
|
|
951
|
+
],
|
|
952
|
+
capture_output=True,
|
|
953
|
+
text=True,
|
|
954
|
+
check=False,
|
|
955
|
+
)
|
|
956
|
+
if completed.returncode != 0:
|
|
957
|
+
return set()
|
|
958
|
+
ports: set[int] = set()
|
|
959
|
+
for name in (line.strip() for line in completed.stdout.splitlines()):
|
|
960
|
+
if not name:
|
|
961
|
+
continue
|
|
962
|
+
info = _inspect_container(name)
|
|
963
|
+
if info is None:
|
|
964
|
+
continue
|
|
965
|
+
ports.update(_container_host_ports(info))
|
|
966
|
+
return ports
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
def _container_host_ports(info: dict[str, object]) -> set[int]:
|
|
970
|
+
"""Extract declared host ports from one Docker inspect payload."""
|
|
971
|
+
|
|
972
|
+
ports: set[int] = set()
|
|
973
|
+
host_config = info.get("HostConfig", {}) or {}
|
|
974
|
+
port_bindings = host_config.get("PortBindings", {}) or {}
|
|
975
|
+
for bindings in port_bindings.values():
|
|
976
|
+
if not isinstance(bindings, list):
|
|
977
|
+
continue
|
|
978
|
+
for binding in bindings:
|
|
979
|
+
if not isinstance(binding, dict):
|
|
980
|
+
continue
|
|
981
|
+
host_port = binding.get("HostPort")
|
|
982
|
+
if isinstance(host_port, str) and host_port.isdigit():
|
|
983
|
+
ports.add(int(host_port))
|
|
984
|
+
|
|
985
|
+
network_settings = info.get("NetworkSettings", {}) or {}
|
|
986
|
+
active_ports = network_settings.get("Ports", {}) or {}
|
|
987
|
+
for bindings in active_ports.values():
|
|
988
|
+
if not isinstance(bindings, list):
|
|
989
|
+
continue
|
|
990
|
+
for binding in bindings:
|
|
991
|
+
if not isinstance(binding, dict):
|
|
992
|
+
continue
|
|
993
|
+
host_port = binding.get("HostPort")
|
|
994
|
+
if isinstance(host_port, str) and host_port.isdigit():
|
|
995
|
+
ports.add(int(host_port))
|
|
996
|
+
return ports
|