shellbrain 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -0
- app/__main__.py +7 -0
- app/boot/__init__.py +1 -0
- app/boot/admin_db.py +88 -0
- app/boot/config.py +14 -0
- app/boot/create_policy.py +52 -0
- app/boot/db.py +70 -0
- app/boot/embeddings.py +55 -0
- app/boot/home.py +45 -0
- app/boot/migrations.py +61 -0
- app/boot/read_policy.py +179 -0
- app/boot/repos.py +15 -0
- app/boot/retrieval.py +3 -0
- app/boot/thresholds.py +19 -0
- app/boot/update_policy.py +34 -0
- app/boot/use_cases.py +22 -0
- app/config/__init__.py +1 -0
- app/config/defaults/create_policy.yaml +7 -0
- app/config/defaults/read_policy.yaml +25 -0
- app/config/defaults/runtime.yaml +10 -0
- app/config/defaults/thresholds.yaml +3 -0
- app/config/defaults/update_policy.yaml +5 -0
- app/config/loader.py +58 -0
- app/core/__init__.py +1 -0
- app/core/contracts/__init__.py +1 -0
- app/core/contracts/errors.py +29 -0
- app/core/contracts/requests.py +211 -0
- app/core/contracts/responses.py +15 -0
- app/core/entities/__init__.py +1 -0
- app/core/entities/associations.py +58 -0
- app/core/entities/episodes.py +66 -0
- app/core/entities/evidence.py +29 -0
- app/core/entities/facts.py +30 -0
- app/core/entities/guidance.py +47 -0
- app/core/entities/identity.py +48 -0
- app/core/entities/memory.py +34 -0
- app/core/entities/runtime_context.py +19 -0
- app/core/entities/session_state.py +31 -0
- app/core/entities/telemetry.py +152 -0
- app/core/entities/utility.py +14 -0
- app/core/interfaces/__init__.py +1 -0
- app/core/interfaces/clock.py +12 -0
- app/core/interfaces/config.py +28 -0
- app/core/interfaces/embeddings.py +12 -0
- app/core/interfaces/idgen.py +11 -0
- app/core/interfaces/repos.py +279 -0
- app/core/interfaces/retrieval.py +20 -0
- app/core/interfaces/session_state_store.py +33 -0
- app/core/interfaces/unit_of_work.py +50 -0
- app/core/policies/__init__.py +1 -0
- app/core/policies/_shared/__init__.py +1 -0
- app/core/policies/_shared/executor.py +132 -0
- app/core/policies/_shared/side_effects.py +9 -0
- app/core/policies/create_policy/__init__.py +1 -0
- app/core/policies/create_policy/pipeline.py +96 -0
- app/core/policies/read_policy/__init__.py +1 -0
- app/core/policies/read_policy/bm25.py +114 -0
- app/core/policies/read_policy/context_pack_builder.py +140 -0
- app/core/policies/read_policy/expansion.py +132 -0
- app/core/policies/read_policy/fusion_rrf.py +34 -0
- app/core/policies/read_policy/lexical_query.py +101 -0
- app/core/policies/read_policy/pipeline.py +93 -0
- app/core/policies/read_policy/scenario_lift.py +11 -0
- app/core/policies/read_policy/scoring.py +61 -0
- app/core/policies/read_policy/seed_retrieval.py +54 -0
- app/core/policies/read_policy/utility_prior.py +11 -0
- app/core/policies/update_policy/__init__.py +1 -0
- app/core/policies/update_policy/pipeline.py +80 -0
- app/core/use_cases/__init__.py +1 -0
- app/core/use_cases/build_guidance.py +85 -0
- app/core/use_cases/create_memory.py +26 -0
- app/core/use_cases/manage_session_state.py +159 -0
- app/core/use_cases/read_memory.py +21 -0
- app/core/use_cases/record_episode_sync_telemetry.py +19 -0
- app/core/use_cases/record_operation_telemetry.py +32 -0
- app/core/use_cases/sync_episode.py +162 -0
- app/core/use_cases/update_memory.py +40 -0
- app/migrations/__init__.py +1 -0
- app/migrations/env.py +65 -0
- app/migrations/versions/20260226_0001_initial_schema.py +232 -0
- app/migrations/versions/20260312_0002_add_hard_invariants.py +60 -0
- app/migrations/versions/20260312_0003_drop_create_confidence.py +40 -0
- app/migrations/versions/20260313_0004_episode_sync_hardening.py +71 -0
- app/migrations/versions/20260313_0005_evidence_episode_event_refs.py +45 -0
- app/migrations/versions/20260318_0006_usage_telemetry_schema.py +175 -0
- app/migrations/versions/20260319_0007_identity_session_guidance.py +49 -0
- app/migrations/versions/20260320_0008_instance_metadata_and_backup_safety.py +31 -0
- app/migrations/versions/__init__.py +1 -0
- app/periphery/__init__.py +1 -0
- app/periphery/admin/__init__.py +1 -0
- app/periphery/admin/backup.py +360 -0
- app/periphery/admin/destructive_guard.py +32 -0
- app/periphery/admin/doctor.py +192 -0
- app/periphery/admin/init.py +996 -0
- app/periphery/admin/instance_guard.py +211 -0
- app/periphery/admin/machine_state.py +354 -0
- app/periphery/admin/privileges.py +42 -0
- app/periphery/admin/repo_state.py +266 -0
- app/periphery/admin/restore.py +30 -0
- app/periphery/cli/__init__.py +1 -0
- app/periphery/cli/handlers.py +830 -0
- app/periphery/cli/hydration.py +119 -0
- app/periphery/cli/main.py +710 -0
- app/periphery/cli/presenter_json.py +10 -0
- app/periphery/cli/schema_validation.py +201 -0
- app/periphery/db/__init__.py +1 -0
- app/periphery/db/engine.py +10 -0
- app/periphery/db/models/__init__.py +1 -0
- app/periphery/db/models/associations.py +55 -0
- app/periphery/db/models/episodes.py +55 -0
- app/periphery/db/models/evidence.py +19 -0
- app/periphery/db/models/experiences.py +33 -0
- app/periphery/db/models/instance_metadata.py +17 -0
- app/periphery/db/models/memories.py +39 -0
- app/periphery/db/models/metadata.py +6 -0
- app/periphery/db/models/registry.py +18 -0
- app/periphery/db/models/telemetry.py +174 -0
- app/periphery/db/models/utility.py +19 -0
- app/periphery/db/models/views.py +154 -0
- app/periphery/db/repos/__init__.py +1 -0
- app/periphery/db/repos/relational/__init__.py +1 -0
- app/periphery/db/repos/relational/associations_repo.py +117 -0
- app/periphery/db/repos/relational/episodes_repo.py +188 -0
- app/periphery/db/repos/relational/evidence_repo.py +82 -0
- app/periphery/db/repos/relational/experiences_repo.py +41 -0
- app/periphery/db/repos/relational/memories_repo.py +99 -0
- app/periphery/db/repos/relational/read_policy_repo.py +202 -0
- app/periphery/db/repos/relational/telemetry_repo.py +161 -0
- app/periphery/db/repos/relational/utility_repo.py +30 -0
- app/periphery/db/repos/semantic/__init__.py +1 -0
- app/periphery/db/repos/semantic/keyword_retrieval_repo.py +63 -0
- app/periphery/db/repos/semantic/semantic_retrieval_repo.py +111 -0
- app/periphery/db/session.py +10 -0
- app/periphery/db/uow.py +75 -0
- app/periphery/embeddings/__init__.py +1 -0
- app/periphery/embeddings/local_provider.py +35 -0
- app/periphery/embeddings/query_vector_search.py +18 -0
- app/periphery/episodes/__init__.py +1 -0
- app/periphery/episodes/claude_code.py +387 -0
- app/periphery/episodes/codex.py +423 -0
- app/periphery/episodes/launcher.py +66 -0
- app/periphery/episodes/normalization.py +31 -0
- app/periphery/episodes/poller.py +299 -0
- app/periphery/episodes/source_discovery.py +66 -0
- app/periphery/episodes/tool_filter.py +165 -0
- app/periphery/identity/__init__.py +1 -0
- app/periphery/identity/claude_hook_install.py +67 -0
- app/periphery/identity/claude_runtime.py +83 -0
- app/periphery/identity/codex_runtime.py +32 -0
- app/periphery/identity/compatibility.py +38 -0
- app/periphery/identity/resolver.py +163 -0
- app/periphery/session_state/__init__.py +1 -0
- app/periphery/session_state/file_store.py +100 -0
- app/periphery/telemetry/__init__.py +33 -0
- app/periphery/telemetry/operation_summary.py +299 -0
- app/periphery/telemetry/session_selection.py +156 -0
- app/periphery/telemetry/sync_summary.py +65 -0
- app/periphery/validation/__init__.py +1 -0
- app/periphery/validation/integrity_validation.py +253 -0
- app/periphery/validation/semantic_validation.py +94 -0
- shellbrain-0.1.0.dist-info/METADATA +130 -0
- shellbrain-0.1.0.dist-info/RECORD +165 -0
- shellbrain-0.1.0.dist-info/WHEEL +5 -0
- shellbrain-0.1.0.dist-info/entry_points.txt +2 -0
- shellbrain-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Storage interface for repo-local per-caller working state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from app.core.entities.session_state import SessionState
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ISessionStateStore(ABC):
|
|
13
|
+
"""Abstract persistence for repo-local session state."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def load(self, *, repo_root: Path, caller_id: str) -> SessionState | None:
|
|
17
|
+
"""Load one caller state when it exists."""
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def save(self, *, repo_root: Path, state: SessionState) -> None:
|
|
21
|
+
"""Persist one caller state."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def delete(self, *, repo_root: Path, caller_id: str) -> None:
|
|
25
|
+
"""Delete one caller state if it exists."""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def list(self, *, repo_root: Path) -> Sequence[SessionState]:
|
|
29
|
+
"""Return all caller states for one repo root."""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def gc(self, *, repo_root: Path, older_than_iso: str) -> list[str]:
|
|
33
|
+
"""Delete caller states last seen before the given cutoff and return deleted caller ids."""
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""This module defines the unit-of-work interface used to enforce transaction boundaries."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Self
|
|
5
|
+
|
|
6
|
+
from app.core.interfaces.repos import (
|
|
7
|
+
IAssociationsRepo,
|
|
8
|
+
IEpisodesRepo,
|
|
9
|
+
IEvidenceRepo,
|
|
10
|
+
IExperiencesRepo,
|
|
11
|
+
IKeywordRetrievalRepo,
|
|
12
|
+
IMemoriesRepo,
|
|
13
|
+
IReadPolicyRepo,
|
|
14
|
+
ISemanticRetrievalRepo,
|
|
15
|
+
ITelemetryRepo,
|
|
16
|
+
IUtilityRepo,
|
|
17
|
+
)
|
|
18
|
+
from app.core.interfaces.retrieval import IVectorSearch
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class IUnitOfWork(ABC):
|
|
22
|
+
"""This interface defines transactional access to all repositories."""
|
|
23
|
+
|
|
24
|
+
memories: IMemoriesRepo
|
|
25
|
+
experiences: IExperiencesRepo
|
|
26
|
+
associations: IAssociationsRepo
|
|
27
|
+
utility: IUtilityRepo
|
|
28
|
+
episodes: IEpisodesRepo
|
|
29
|
+
evidence: IEvidenceRepo
|
|
30
|
+
semantic_retrieval: ISemanticRetrievalRepo
|
|
31
|
+
keyword_retrieval: IKeywordRetrievalRepo
|
|
32
|
+
read_policy: IReadPolicyRepo
|
|
33
|
+
telemetry: ITelemetryRepo
|
|
34
|
+
vector_search: IVectorSearch | None
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def __enter__(self) -> Self:
|
|
38
|
+
"""This method opens a transaction scope and returns itself."""
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
42
|
+
"""This method exits the transaction scope with commit-or-rollback behavior."""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def commit(self) -> None:
|
|
46
|
+
"""This method commits the current transaction."""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def rollback(self) -> None:
|
|
50
|
+
"""This method rolls back the current transaction."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package defines core create, read, and update policy packages."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package defines minimal shared internals for create and update policies."""
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""This module defines shared side-effect execution for create and update policies."""
|
|
2
|
+
|
|
3
|
+
from app.core.entities.associations import (
|
|
4
|
+
AssociationEdge,
|
|
5
|
+
AssociationObservation,
|
|
6
|
+
AssociationRelationType,
|
|
7
|
+
AssociationSourceMode,
|
|
8
|
+
AssociationState,
|
|
9
|
+
)
|
|
10
|
+
from app.core.entities.facts import FactUpdate, ProblemAttempt, ProblemAttemptRole
|
|
11
|
+
from app.core.entities.memory import Memory, MemoryKind, MemoryScope
|
|
12
|
+
from app.core.entities.utility import UtilityObservation
|
|
13
|
+
from app.core.interfaces.embeddings import IEmbeddingProvider
|
|
14
|
+
from app.core.interfaces.unit_of_work import IUnitOfWork
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def apply_side_effects(
|
|
18
|
+
plan: list[dict[str, object]],
|
|
19
|
+
uow: IUnitOfWork,
|
|
20
|
+
*,
|
|
21
|
+
embedding_provider: IEmbeddingProvider | None = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
"""This function executes a deterministic side-effect plan inside one transaction."""
|
|
24
|
+
|
|
25
|
+
for effect in plan:
|
|
26
|
+
effect_type = str(effect["effect_type"])
|
|
27
|
+
params = effect["params"]
|
|
28
|
+
assert isinstance(params, dict)
|
|
29
|
+
|
|
30
|
+
if effect_type == "memory.create":
|
|
31
|
+
uow.memories.create(
|
|
32
|
+
Memory(
|
|
33
|
+
id=str(params["memory_id"]),
|
|
34
|
+
repo_id=str(params["repo_id"]),
|
|
35
|
+
scope=MemoryScope(str(params["scope"])),
|
|
36
|
+
kind=MemoryKind(str(params["kind"])),
|
|
37
|
+
text=str(params["text"]),
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
if effect_type == "memory_embedding.upsert":
|
|
43
|
+
if embedding_provider is None:
|
|
44
|
+
raise RuntimeError("Embedding provider is required for memory_embedding.upsert")
|
|
45
|
+
uow.memories.upsert_embedding(
|
|
46
|
+
memory_id=str(params["memory_id"]),
|
|
47
|
+
model=str(params["model"]),
|
|
48
|
+
vector=embedding_provider.embed(str(params["text"])),
|
|
49
|
+
)
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
if effect_type == "memory_evidence.attach":
|
|
53
|
+
refs = params["refs"]
|
|
54
|
+
assert isinstance(refs, list)
|
|
55
|
+
for ref in refs:
|
|
56
|
+
evidence = uow.evidence.upsert_ref(repo_id=str(params["repo_id"]), ref=str(ref))
|
|
57
|
+
uow.evidence.link_memory_evidence(memory_id=str(params["memory_id"]), evidence_id=evidence.id)
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
if effect_type == "problem_attempt.create":
|
|
61
|
+
uow.experiences.create_problem_attempt(
|
|
62
|
+
ProblemAttempt(
|
|
63
|
+
problem_id=str(params["problem_id"]),
|
|
64
|
+
attempt_id=str(params["attempt_id"]),
|
|
65
|
+
role=ProblemAttemptRole(str(params["role"])),
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
if effect_type == "memory.archive_state":
|
|
71
|
+
updated = uow.memories.set_archived(memory_id=str(params["memory_id"]), archived=bool(params["archived"]))
|
|
72
|
+
if not updated:
|
|
73
|
+
raise LookupError(f"Target shellbrain not found for archive update: {params['memory_id']}")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
if effect_type == "utility_observation.append":
|
|
77
|
+
uow.utility.append_observation(
|
|
78
|
+
UtilityObservation(
|
|
79
|
+
id=str(params["id"]),
|
|
80
|
+
memory_id=str(params["memory_id"]),
|
|
81
|
+
problem_id=str(params["problem_id"]),
|
|
82
|
+
vote=float(params["vote"]),
|
|
83
|
+
rationale=str(params["rationale"]) if params.get("rationale") is not None else None,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if effect_type == "fact_update.create":
|
|
89
|
+
uow.experiences.create_fact_update(
|
|
90
|
+
FactUpdate(
|
|
91
|
+
id=str(params["id"]),
|
|
92
|
+
old_fact_id=str(params["old_fact_id"]),
|
|
93
|
+
change_id=str(params["change_id"]),
|
|
94
|
+
new_fact_id=str(params["new_fact_id"]),
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
if effect_type == "association.upsert_and_observe":
|
|
100
|
+
edge = uow.associations.upsert_edge(
|
|
101
|
+
AssociationEdge(
|
|
102
|
+
id=str(params["edge_id"]),
|
|
103
|
+
repo_id=str(params["repo_id"]),
|
|
104
|
+
from_memory_id=str(params["from_memory_id"]),
|
|
105
|
+
to_memory_id=str(params["to_memory_id"]),
|
|
106
|
+
relation_type=AssociationRelationType(str(params["relation_type"])),
|
|
107
|
+
source_mode=AssociationSourceMode(str(params["source_mode"])),
|
|
108
|
+
state=AssociationState(str(params["state"])),
|
|
109
|
+
strength=float(params["strength"]),
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
uow.associations.append_observation(
|
|
113
|
+
AssociationObservation(
|
|
114
|
+
id=str(params["observation_id"]),
|
|
115
|
+
repo_id=str(params["repo_id"]),
|
|
116
|
+
edge_id=edge.id,
|
|
117
|
+
from_memory_id=str(params["from_memory_id"]),
|
|
118
|
+
to_memory_id=str(params["to_memory_id"]),
|
|
119
|
+
relation_type=AssociationRelationType(str(params["relation_type"])),
|
|
120
|
+
source=str(params["observation_source"]),
|
|
121
|
+
valence=float(params["valence"]),
|
|
122
|
+
salience=float(params["salience"]),
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
evidence_refs = params.get("evidence_refs", [])
|
|
126
|
+
assert isinstance(evidence_refs, list)
|
|
127
|
+
for ref in evidence_refs:
|
|
128
|
+
evidence = uow.evidence.upsert_ref(repo_id=str(params["repo_id"]), ref=str(ref))
|
|
129
|
+
uow.evidence.link_association_edge_evidence(edge_id=edge.id, evidence_id=evidence.id)
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
raise ValueError(f"Unsupported side effect type: {effect_type}")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""This module defines shared side-effect descriptor helpers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def make_side_effect(effect_type: str, params: dict[str, Any]) -> dict[str, Any]:
|
|
7
|
+
"""This function creates a normalized side-effect descriptor object."""
|
|
8
|
+
|
|
9
|
+
return {"effect_type": effect_type, "params": params}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package defines deterministic create-policy planning and execution."""
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""This module defines create-policy planning and execution helpers."""
|
|
2
|
+
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from app.core.entities.associations import AssociationSourceMode, AssociationState
|
|
7
|
+
from app.core.entities.memory import MemoryKind
|
|
8
|
+
from app.core.interfaces.embeddings import IEmbeddingProvider
|
|
9
|
+
from app.core.interfaces.unit_of_work import IUnitOfWork
|
|
10
|
+
from app.core.policies._shared.executor import apply_side_effects
|
|
11
|
+
from app.core.policies._shared.side_effects import make_side_effect
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_create_plan(payload: dict[str, Any], *, embedding_model: str = "unknown") -> list[dict[str, Any]]:
|
|
15
|
+
"""This function converts a validated create payload into deterministic side effects."""
|
|
16
|
+
|
|
17
|
+
memory = payload["memory"]
|
|
18
|
+
repo_id = payload["repo_id"]
|
|
19
|
+
memory_id = payload["memory_id"]
|
|
20
|
+
plan: list[dict[str, Any]] = [
|
|
21
|
+
make_side_effect(
|
|
22
|
+
"memory.create",
|
|
23
|
+
{
|
|
24
|
+
"memory_id": memory_id,
|
|
25
|
+
"repo_id": repo_id,
|
|
26
|
+
"scope": memory["scope"],
|
|
27
|
+
"kind": memory["kind"],
|
|
28
|
+
"text": memory["text"],
|
|
29
|
+
},
|
|
30
|
+
),
|
|
31
|
+
make_side_effect(
|
|
32
|
+
"memory_embedding.upsert",
|
|
33
|
+
{
|
|
34
|
+
"memory_id": memory_id,
|
|
35
|
+
"model": embedding_model,
|
|
36
|
+
"text": memory["text"],
|
|
37
|
+
},
|
|
38
|
+
),
|
|
39
|
+
make_side_effect(
|
|
40
|
+
"memory_evidence.attach",
|
|
41
|
+
{
|
|
42
|
+
"memory_id": memory_id,
|
|
43
|
+
"repo_id": repo_id,
|
|
44
|
+
"refs": list(memory["evidence_refs"]),
|
|
45
|
+
},
|
|
46
|
+
),
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
problem_id = (memory.get("links") or {}).get("problem_id")
|
|
50
|
+
if memory["kind"] in {MemoryKind.SOLUTION.value, MemoryKind.FAILED_TACTIC.value} and problem_id:
|
|
51
|
+
plan.append(
|
|
52
|
+
make_side_effect(
|
|
53
|
+
"problem_attempt.create",
|
|
54
|
+
{
|
|
55
|
+
"problem_id": problem_id,
|
|
56
|
+
"attempt_id": memory_id,
|
|
57
|
+
"role": memory["kind"],
|
|
58
|
+
},
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
for association in (memory.get("links") or {}).get("associations", []):
|
|
63
|
+
confidence = association.get("confidence")
|
|
64
|
+
salience = association.get("salience")
|
|
65
|
+
plan.append(
|
|
66
|
+
make_side_effect(
|
|
67
|
+
"association.upsert_and_observe",
|
|
68
|
+
{
|
|
69
|
+
"repo_id": repo_id,
|
|
70
|
+
"edge_id": str(uuid4()),
|
|
71
|
+
"from_memory_id": memory_id,
|
|
72
|
+
"to_memory_id": association["to_memory_id"],
|
|
73
|
+
"relation_type": association["relation_type"],
|
|
74
|
+
"source_mode": AssociationSourceMode.AGENT.value,
|
|
75
|
+
"state": AssociationState.TENTATIVE.value,
|
|
76
|
+
"strength": confidence if confidence is not None else 0.5,
|
|
77
|
+
"observation_id": str(uuid4()),
|
|
78
|
+
"observation_source": "agent_explicit",
|
|
79
|
+
"valence": confidence if confidence is not None else 0.5,
|
|
80
|
+
"salience": salience if salience is not None else 0.5,
|
|
81
|
+
"evidence_refs": list(memory["evidence_refs"]),
|
|
82
|
+
},
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
return plan
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def apply_create_plan(
|
|
89
|
+
plan: list[dict[str, Any]],
|
|
90
|
+
uow: IUnitOfWork,
|
|
91
|
+
*,
|
|
92
|
+
embedding_provider: IEmbeddingProvider,
|
|
93
|
+
) -> None:
|
|
94
|
+
"""This function executes a deterministic create plan inside one transaction."""
|
|
95
|
+
|
|
96
|
+
apply_side_effects(plan, uow, embedding_provider=embedding_provider)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package defines read-policy retrieval and context-pack assembly stages."""
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Helpers for scoring lexical candidates with Okapi BM25 and coverage-aware admission."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from math import log
|
|
8
|
+
from typing import Literal, Sequence
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_K1 = 1.2
|
|
12
|
+
_B = 0.75
|
|
13
|
+
_COVERAGE_THRESHOLD_BY_MODE = {
|
|
14
|
+
"targeted": 0.65,
|
|
15
|
+
"ambient": 0.80,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True, slots=True)
|
|
20
|
+
class BM25Document:
|
|
21
|
+
"""Normalized document representation used for BM25 scoring."""
|
|
22
|
+
|
|
23
|
+
memory_id: str
|
|
24
|
+
terms: tuple[str, ...]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True, slots=True)
|
|
28
|
+
class BM25ScoredDocument:
|
|
29
|
+
"""Scored lexical candidate with query-coverage metadata."""
|
|
30
|
+
|
|
31
|
+
memory_id: str
|
|
32
|
+
score: float
|
|
33
|
+
coverage: float
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def score_documents(
|
|
37
|
+
query_terms: Sequence[str],
|
|
38
|
+
documents: Sequence[BM25Document],
|
|
39
|
+
*,
|
|
40
|
+
k1: float = _K1,
|
|
41
|
+
b: float = _B,
|
|
42
|
+
) -> list[BM25ScoredDocument]:
|
|
43
|
+
"""Return BM25 scores and weighted query coverage for partially matching documents."""
|
|
44
|
+
|
|
45
|
+
normalized_query_terms = tuple(query_terms)
|
|
46
|
+
if not normalized_query_terms or not documents:
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
term_frequencies = {document.memory_id: Counter(document.terms) for document in documents}
|
|
50
|
+
corpus_size = len(documents)
|
|
51
|
+
average_length = sum(len(document.terms) for document in documents) / corpus_size
|
|
52
|
+
if average_length <= 0.0:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
inverse_document_frequencies = {
|
|
56
|
+
term: _inverse_document_frequency(
|
|
57
|
+
sum(1 for document in documents if term in term_frequencies[document.memory_id]),
|
|
58
|
+
corpus_size,
|
|
59
|
+
)
|
|
60
|
+
for term in normalized_query_terms
|
|
61
|
+
}
|
|
62
|
+
total_query_weight = sum(inverse_document_frequencies.values())
|
|
63
|
+
if total_query_weight <= 0.0:
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
scored: list[BM25ScoredDocument] = []
|
|
67
|
+
for document in documents:
|
|
68
|
+
frequencies = term_frequencies[document.memory_id]
|
|
69
|
+
matched_terms = tuple(term for term in normalized_query_terms if term in frequencies)
|
|
70
|
+
if not matched_terms:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
score = 0.0
|
|
74
|
+
matched_query_weight = 0.0
|
|
75
|
+
document_length = len(document.terms)
|
|
76
|
+
normalization = k1 * (1 - b + b * document_length / average_length)
|
|
77
|
+
for term in matched_terms:
|
|
78
|
+
inverse_document_frequency = inverse_document_frequencies[term]
|
|
79
|
+
term_frequency = frequencies[term]
|
|
80
|
+
score += inverse_document_frequency * ((term_frequency * (k1 + 1)) / (term_frequency + normalization))
|
|
81
|
+
matched_query_weight += inverse_document_frequency
|
|
82
|
+
|
|
83
|
+
if score > 0.0:
|
|
84
|
+
scored.append(
|
|
85
|
+
BM25ScoredDocument(
|
|
86
|
+
memory_id=document.memory_id,
|
|
87
|
+
score=score,
|
|
88
|
+
coverage=matched_query_weight / total_query_weight,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
return sorted(scored, key=lambda item: (-item.score, item.memory_id))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def admit_scored_documents(
|
|
96
|
+
scored_documents: Sequence[BM25ScoredDocument],
|
|
97
|
+
*,
|
|
98
|
+
mode: Literal["ambient", "targeted"],
|
|
99
|
+
) -> list[dict[str, object]]:
|
|
100
|
+
"""Apply the mode-aware weighted query-coverage gate to scored lexical candidates."""
|
|
101
|
+
|
|
102
|
+
threshold = _COVERAGE_THRESHOLD_BY_MODE[mode]
|
|
103
|
+
admitted = [
|
|
104
|
+
{"memory_id": document.memory_id, "score": document.score}
|
|
105
|
+
for document in scored_documents
|
|
106
|
+
if document.coverage >= threshold
|
|
107
|
+
]
|
|
108
|
+
return sorted(admitted, key=lambda item: (-float(item["score"]), str(item["memory_id"])))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _inverse_document_frequency(document_frequency: int, corpus_size: int) -> float:
|
|
112
|
+
"""Return the BM25 IDF weight for one query term in the current visible corpus."""
|
|
113
|
+
|
|
114
|
+
return log(1 + (corpus_size - document_frequency + 0.5) / (document_frequency + 0.5))
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""This module defines bounded context-pack assembly with quotas, dedupe, and hard caps."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from app.boot.read_policy import resolve_read_limit, resolve_read_quotas
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_BUCKET_ORDER = ("direct", "explicit", "implicit")
|
|
9
|
+
_SECTION_NAMES = {
|
|
10
|
+
"direct": "direct",
|
|
11
|
+
"explicit": "explicit_related",
|
|
12
|
+
"implicit": "implicit_related",
|
|
13
|
+
}
|
|
14
|
+
_BUCKET_PRIORITY = {bucket_name: index for index, bucket_name in enumerate(_BUCKET_ORDER)}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def assemble_context_pack(scored_candidates: dict[str, list[dict[str, Any]]], payload: dict[str, Any]) -> dict[str, Any]:
|
|
18
|
+
"""This function assembles a final context pack from bucketed candidate groups."""
|
|
19
|
+
|
|
20
|
+
mode = str(payload["mode"])
|
|
21
|
+
limit = resolve_read_limit(mode=mode, explicit_limit=payload.get("limit"))
|
|
22
|
+
quotas = resolve_read_quotas(mode=mode)
|
|
23
|
+
sorted_buckets = {
|
|
24
|
+
bucket_name: sorted(
|
|
25
|
+
scored_candidates.get(bucket_name, []),
|
|
26
|
+
key=lambda item: (-float(item["score"]), str(item["memory_id"])),
|
|
27
|
+
)
|
|
28
|
+
for bucket_name in _BUCKET_ORDER
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
selected_by_bucket: dict[str, list[dict[str, Any]]] = {bucket_name: [] for bucket_name in _BUCKET_ORDER}
|
|
32
|
+
seen_memory_ids: set[str] = set()
|
|
33
|
+
spill_pool: list[tuple[str, dict[str, Any]]] = []
|
|
34
|
+
remaining = limit
|
|
35
|
+
|
|
36
|
+
for bucket_name in _BUCKET_ORDER:
|
|
37
|
+
section_quota = min(int(quotas.get(bucket_name, 0)), remaining)
|
|
38
|
+
selected_count = 0
|
|
39
|
+
for candidate in sorted_buckets[bucket_name]:
|
|
40
|
+
memory_id = str(candidate["memory_id"])
|
|
41
|
+
if memory_id in seen_memory_ids:
|
|
42
|
+
continue
|
|
43
|
+
if selected_count < section_quota:
|
|
44
|
+
seen_memory_ids.add(memory_id)
|
|
45
|
+
selected_by_bucket[bucket_name].append(candidate)
|
|
46
|
+
selected_count += 1
|
|
47
|
+
remaining -= 1
|
|
48
|
+
else:
|
|
49
|
+
spill_pool.append((bucket_name, candidate))
|
|
50
|
+
|
|
51
|
+
if remaining > 0:
|
|
52
|
+
spill_pool.sort(
|
|
53
|
+
key=lambda item: (
|
|
54
|
+
-float(item[1]["score"]),
|
|
55
|
+
_BUCKET_PRIORITY[item[0]],
|
|
56
|
+
str(item[1]["memory_id"]),
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
for bucket_name, candidate in spill_pool:
|
|
60
|
+
if remaining <= 0:
|
|
61
|
+
break
|
|
62
|
+
memory_id = str(candidate["memory_id"])
|
|
63
|
+
if memory_id in seen_memory_ids:
|
|
64
|
+
continue
|
|
65
|
+
seen_memory_ids.add(memory_id)
|
|
66
|
+
selected_by_bucket[bucket_name].append(candidate)
|
|
67
|
+
remaining -= 1
|
|
68
|
+
|
|
69
|
+
sections = {
|
|
70
|
+
"direct": [_shape_item(candidate, "direct") for candidate in selected_by_bucket["direct"]],
|
|
71
|
+
"explicit_related": [_shape_item(candidate, "explicit") for candidate in selected_by_bucket["explicit"]],
|
|
72
|
+
"implicit_related": [_shape_item(candidate, "implicit") for candidate in selected_by_bucket["implicit"]],
|
|
73
|
+
}
|
|
74
|
+
_assign_priorities(sections)
|
|
75
|
+
return {
|
|
76
|
+
"meta": {
|
|
77
|
+
"mode": mode,
|
|
78
|
+
"query": payload.get("query"),
|
|
79
|
+
"limit": limit,
|
|
80
|
+
"counts": {
|
|
81
|
+
"direct": len(sections["direct"]),
|
|
82
|
+
"explicit_related": len(sections["explicit_related"]),
|
|
83
|
+
"implicit_related": len(sections["implicit_related"]),
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
"direct": sections["direct"],
|
|
87
|
+
"explicit_related": sections["explicit_related"],
|
|
88
|
+
"implicit_related": sections["implicit_related"],
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _shape_item(candidate: dict[str, Any], bucket_name: str) -> dict[str, Any]:
|
|
93
|
+
"""Project one internal candidate into the compact LLM-facing item shape."""
|
|
94
|
+
|
|
95
|
+
item: dict[str, Any] = {
|
|
96
|
+
"memory_id": str(candidate["memory_id"]),
|
|
97
|
+
"why_included": _resolve_why_included(candidate, bucket_name),
|
|
98
|
+
}
|
|
99
|
+
if "kind" in candidate:
|
|
100
|
+
item["kind"] = _normalize_kind(candidate["kind"])
|
|
101
|
+
if "text" in candidate:
|
|
102
|
+
item["text"] = str(candidate["text"])
|
|
103
|
+
if bucket_name != "direct" and "anchor_memory_id" in candidate:
|
|
104
|
+
item["anchor_memory_id"] = str(candidate["anchor_memory_id"])
|
|
105
|
+
if "relation_type" in candidate:
|
|
106
|
+
item["relation_type"] = str(candidate["relation_type"])
|
|
107
|
+
return item
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _resolve_why_included(candidate: dict[str, Any], bucket_name: str) -> str:
|
|
111
|
+
"""Resolve a stable user-facing inclusion reason from candidate metadata."""
|
|
112
|
+
|
|
113
|
+
if "why_included" in candidate:
|
|
114
|
+
return str(candidate["why_included"])
|
|
115
|
+
if bucket_name == "direct":
|
|
116
|
+
return "direct_match"
|
|
117
|
+
if bucket_name == "implicit":
|
|
118
|
+
return "semantic_neighbor"
|
|
119
|
+
expansion_type = str(candidate.get("expansion_type", ""))
|
|
120
|
+
return {
|
|
121
|
+
"problem_attempt": "problem_attempt",
|
|
122
|
+
"fact_update": "fact_update",
|
|
123
|
+
"association": "association_link",
|
|
124
|
+
}.get(expansion_type, expansion_type or "related_memory")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _normalize_kind(kind: Any) -> str:
|
|
128
|
+
"""Normalize shellbrain kind values from entities or strings into JSON-safe text."""
|
|
129
|
+
|
|
130
|
+
return str(getattr(kind, "value", kind))
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _assign_priorities(sections: dict[str, list[dict[str, Any]]]) -> None:
|
|
134
|
+
"""Assign one global priority order across displayed sections."""
|
|
135
|
+
|
|
136
|
+
priority = 1
|
|
137
|
+
for section_name in ("direct", "explicit_related", "implicit_related"):
|
|
138
|
+
for item in sections[section_name]:
|
|
139
|
+
item["priority"] = priority
|
|
140
|
+
priority += 1
|