shellbrain 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. app/__init__.py +1 -0
  2. app/__main__.py +7 -0
  3. app/boot/__init__.py +1 -0
  4. app/boot/admin_db.py +88 -0
  5. app/boot/config.py +14 -0
  6. app/boot/create_policy.py +52 -0
  7. app/boot/db.py +70 -0
  8. app/boot/embeddings.py +55 -0
  9. app/boot/home.py +45 -0
  10. app/boot/migrations.py +61 -0
  11. app/boot/read_policy.py +179 -0
  12. app/boot/repos.py +15 -0
  13. app/boot/retrieval.py +3 -0
  14. app/boot/thresholds.py +19 -0
  15. app/boot/update_policy.py +34 -0
  16. app/boot/use_cases.py +22 -0
  17. app/config/__init__.py +1 -0
  18. app/config/defaults/create_policy.yaml +7 -0
  19. app/config/defaults/read_policy.yaml +25 -0
  20. app/config/defaults/runtime.yaml +10 -0
  21. app/config/defaults/thresholds.yaml +3 -0
  22. app/config/defaults/update_policy.yaml +5 -0
  23. app/config/loader.py +58 -0
  24. app/core/__init__.py +1 -0
  25. app/core/contracts/__init__.py +1 -0
  26. app/core/contracts/errors.py +29 -0
  27. app/core/contracts/requests.py +211 -0
  28. app/core/contracts/responses.py +15 -0
  29. app/core/entities/__init__.py +1 -0
  30. app/core/entities/associations.py +58 -0
  31. app/core/entities/episodes.py +66 -0
  32. app/core/entities/evidence.py +29 -0
  33. app/core/entities/facts.py +30 -0
  34. app/core/entities/guidance.py +47 -0
  35. app/core/entities/identity.py +48 -0
  36. app/core/entities/memory.py +34 -0
  37. app/core/entities/runtime_context.py +19 -0
  38. app/core/entities/session_state.py +31 -0
  39. app/core/entities/telemetry.py +152 -0
  40. app/core/entities/utility.py +14 -0
  41. app/core/interfaces/__init__.py +1 -0
  42. app/core/interfaces/clock.py +12 -0
  43. app/core/interfaces/config.py +28 -0
  44. app/core/interfaces/embeddings.py +12 -0
  45. app/core/interfaces/idgen.py +11 -0
  46. app/core/interfaces/repos.py +279 -0
  47. app/core/interfaces/retrieval.py +20 -0
  48. app/core/interfaces/session_state_store.py +33 -0
  49. app/core/interfaces/unit_of_work.py +50 -0
  50. app/core/policies/__init__.py +1 -0
  51. app/core/policies/_shared/__init__.py +1 -0
  52. app/core/policies/_shared/executor.py +132 -0
  53. app/core/policies/_shared/side_effects.py +9 -0
  54. app/core/policies/create_policy/__init__.py +1 -0
  55. app/core/policies/create_policy/pipeline.py +96 -0
  56. app/core/policies/read_policy/__init__.py +1 -0
  57. app/core/policies/read_policy/bm25.py +114 -0
  58. app/core/policies/read_policy/context_pack_builder.py +140 -0
  59. app/core/policies/read_policy/expansion.py +132 -0
  60. app/core/policies/read_policy/fusion_rrf.py +34 -0
  61. app/core/policies/read_policy/lexical_query.py +101 -0
  62. app/core/policies/read_policy/pipeline.py +93 -0
  63. app/core/policies/read_policy/scenario_lift.py +11 -0
  64. app/core/policies/read_policy/scoring.py +61 -0
  65. app/core/policies/read_policy/seed_retrieval.py +54 -0
  66. app/core/policies/read_policy/utility_prior.py +11 -0
  67. app/core/policies/update_policy/__init__.py +1 -0
  68. app/core/policies/update_policy/pipeline.py +80 -0
  69. app/core/use_cases/__init__.py +1 -0
  70. app/core/use_cases/build_guidance.py +85 -0
  71. app/core/use_cases/create_memory.py +26 -0
  72. app/core/use_cases/manage_session_state.py +159 -0
  73. app/core/use_cases/read_memory.py +21 -0
  74. app/core/use_cases/record_episode_sync_telemetry.py +19 -0
  75. app/core/use_cases/record_operation_telemetry.py +32 -0
  76. app/core/use_cases/sync_episode.py +162 -0
  77. app/core/use_cases/update_memory.py +40 -0
  78. app/migrations/__init__.py +1 -0
  79. app/migrations/env.py +65 -0
  80. app/migrations/versions/20260226_0001_initial_schema.py +232 -0
  81. app/migrations/versions/20260312_0002_add_hard_invariants.py +60 -0
  82. app/migrations/versions/20260312_0003_drop_create_confidence.py +40 -0
  83. app/migrations/versions/20260313_0004_episode_sync_hardening.py +71 -0
  84. app/migrations/versions/20260313_0005_evidence_episode_event_refs.py +45 -0
  85. app/migrations/versions/20260318_0006_usage_telemetry_schema.py +175 -0
  86. app/migrations/versions/20260319_0007_identity_session_guidance.py +49 -0
  87. app/migrations/versions/20260320_0008_instance_metadata_and_backup_safety.py +31 -0
  88. app/migrations/versions/__init__.py +1 -0
  89. app/periphery/__init__.py +1 -0
  90. app/periphery/admin/__init__.py +1 -0
  91. app/periphery/admin/backup.py +360 -0
  92. app/periphery/admin/destructive_guard.py +32 -0
  93. app/periphery/admin/doctor.py +192 -0
  94. app/periphery/admin/init.py +996 -0
  95. app/periphery/admin/instance_guard.py +211 -0
  96. app/periphery/admin/machine_state.py +354 -0
  97. app/periphery/admin/privileges.py +42 -0
  98. app/periphery/admin/repo_state.py +266 -0
  99. app/periphery/admin/restore.py +30 -0
  100. app/periphery/cli/__init__.py +1 -0
  101. app/periphery/cli/handlers.py +830 -0
  102. app/periphery/cli/hydration.py +119 -0
  103. app/periphery/cli/main.py +710 -0
  104. app/periphery/cli/presenter_json.py +10 -0
  105. app/periphery/cli/schema_validation.py +201 -0
  106. app/periphery/db/__init__.py +1 -0
  107. app/periphery/db/engine.py +10 -0
  108. app/periphery/db/models/__init__.py +1 -0
  109. app/periphery/db/models/associations.py +55 -0
  110. app/periphery/db/models/episodes.py +55 -0
  111. app/periphery/db/models/evidence.py +19 -0
  112. app/periphery/db/models/experiences.py +33 -0
  113. app/periphery/db/models/instance_metadata.py +17 -0
  114. app/periphery/db/models/memories.py +39 -0
  115. app/periphery/db/models/metadata.py +6 -0
  116. app/periphery/db/models/registry.py +18 -0
  117. app/periphery/db/models/telemetry.py +174 -0
  118. app/periphery/db/models/utility.py +19 -0
  119. app/periphery/db/models/views.py +154 -0
  120. app/periphery/db/repos/__init__.py +1 -0
  121. app/periphery/db/repos/relational/__init__.py +1 -0
  122. app/periphery/db/repos/relational/associations_repo.py +117 -0
  123. app/periphery/db/repos/relational/episodes_repo.py +188 -0
  124. app/periphery/db/repos/relational/evidence_repo.py +82 -0
  125. app/periphery/db/repos/relational/experiences_repo.py +41 -0
  126. app/periphery/db/repos/relational/memories_repo.py +99 -0
  127. app/periphery/db/repos/relational/read_policy_repo.py +202 -0
  128. app/periphery/db/repos/relational/telemetry_repo.py +161 -0
  129. app/periphery/db/repos/relational/utility_repo.py +30 -0
  130. app/periphery/db/repos/semantic/__init__.py +1 -0
  131. app/periphery/db/repos/semantic/keyword_retrieval_repo.py +63 -0
  132. app/periphery/db/repos/semantic/semantic_retrieval_repo.py +111 -0
  133. app/periphery/db/session.py +10 -0
  134. app/periphery/db/uow.py +75 -0
  135. app/periphery/embeddings/__init__.py +1 -0
  136. app/periphery/embeddings/local_provider.py +35 -0
  137. app/periphery/embeddings/query_vector_search.py +18 -0
  138. app/periphery/episodes/__init__.py +1 -0
  139. app/periphery/episodes/claude_code.py +387 -0
  140. app/periphery/episodes/codex.py +423 -0
  141. app/periphery/episodes/launcher.py +66 -0
  142. app/periphery/episodes/normalization.py +31 -0
  143. app/periphery/episodes/poller.py +299 -0
  144. app/periphery/episodes/source_discovery.py +66 -0
  145. app/periphery/episodes/tool_filter.py +165 -0
  146. app/periphery/identity/__init__.py +1 -0
  147. app/periphery/identity/claude_hook_install.py +67 -0
  148. app/periphery/identity/claude_runtime.py +83 -0
  149. app/periphery/identity/codex_runtime.py +32 -0
  150. app/periphery/identity/compatibility.py +38 -0
  151. app/periphery/identity/resolver.py +163 -0
  152. app/periphery/session_state/__init__.py +1 -0
  153. app/periphery/session_state/file_store.py +100 -0
  154. app/periphery/telemetry/__init__.py +33 -0
  155. app/periphery/telemetry/operation_summary.py +299 -0
  156. app/periphery/telemetry/session_selection.py +156 -0
  157. app/periphery/telemetry/sync_summary.py +65 -0
  158. app/periphery/validation/__init__.py +1 -0
  159. app/periphery/validation/integrity_validation.py +253 -0
  160. app/periphery/validation/semantic_validation.py +94 -0
  161. shellbrain-0.1.0.dist-info/METADATA +130 -0
  162. shellbrain-0.1.0.dist-info/RECORD +165 -0
  163. shellbrain-0.1.0.dist-info/WHEEL +5 -0
  164. shellbrain-0.1.0.dist-info/entry_points.txt +2 -0
  165. shellbrain-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,33 @@
1
+ """Storage interface for repo-local per-caller working state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from collections.abc import Sequence
7
+ from pathlib import Path
8
+
9
+ from app.core.entities.session_state import SessionState
10
+
11
+
12
+ class ISessionStateStore(ABC):
13
+ """Abstract persistence for repo-local session state."""
14
+
15
+ @abstractmethod
16
+ def load(self, *, repo_root: Path, caller_id: str) -> SessionState | None:
17
+ """Load one caller state when it exists."""
18
+
19
+ @abstractmethod
20
+ def save(self, *, repo_root: Path, state: SessionState) -> None:
21
+ """Persist one caller state."""
22
+
23
+ @abstractmethod
24
+ def delete(self, *, repo_root: Path, caller_id: str) -> None:
25
+ """Delete one caller state if it exists."""
26
+
27
+ @abstractmethod
28
+ def list(self, *, repo_root: Path) -> Sequence[SessionState]:
29
+ """Return all caller states for one repo root."""
30
+
31
+ @abstractmethod
32
+ def gc(self, *, repo_root: Path, older_than_iso: str) -> list[str]:
33
+ """Delete caller states last seen before the given cutoff and return deleted caller ids."""
@@ -0,0 +1,50 @@
1
+ """This module defines the unit-of-work interface used to enforce transaction boundaries."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Self
5
+
6
+ from app.core.interfaces.repos import (
7
+ IAssociationsRepo,
8
+ IEpisodesRepo,
9
+ IEvidenceRepo,
10
+ IExperiencesRepo,
11
+ IKeywordRetrievalRepo,
12
+ IMemoriesRepo,
13
+ IReadPolicyRepo,
14
+ ISemanticRetrievalRepo,
15
+ ITelemetryRepo,
16
+ IUtilityRepo,
17
+ )
18
+ from app.core.interfaces.retrieval import IVectorSearch
19
+
20
+
21
+ class IUnitOfWork(ABC):
22
+ """This interface defines transactional access to all repositories."""
23
+
24
+ memories: IMemoriesRepo
25
+ experiences: IExperiencesRepo
26
+ associations: IAssociationsRepo
27
+ utility: IUtilityRepo
28
+ episodes: IEpisodesRepo
29
+ evidence: IEvidenceRepo
30
+ semantic_retrieval: ISemanticRetrievalRepo
31
+ keyword_retrieval: IKeywordRetrievalRepo
32
+ read_policy: IReadPolicyRepo
33
+ telemetry: ITelemetryRepo
34
+ vector_search: IVectorSearch | None
35
+
36
+ @abstractmethod
37
+ def __enter__(self) -> Self:
38
+ """This method opens a transaction scope and returns itself."""
39
+
40
+ @abstractmethod
41
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
42
+ """This method exits the transaction scope with commit-or-rollback behavior."""
43
+
44
+ @abstractmethod
45
+ def commit(self) -> None:
46
+ """This method commits the current transaction."""
47
+
48
+ @abstractmethod
49
+ def rollback(self) -> None:
50
+ """This method rolls back the current transaction."""
@@ -0,0 +1 @@
1
+ """This package defines core create, read, and update policy packages."""
@@ -0,0 +1 @@
1
+ """This package defines minimal shared internals for create and update policies."""
@@ -0,0 +1,132 @@
1
+ """This module defines shared side-effect execution for create and update policies."""
2
+
3
+ from app.core.entities.associations import (
4
+ AssociationEdge,
5
+ AssociationObservation,
6
+ AssociationRelationType,
7
+ AssociationSourceMode,
8
+ AssociationState,
9
+ )
10
+ from app.core.entities.facts import FactUpdate, ProblemAttempt, ProblemAttemptRole
11
+ from app.core.entities.memory import Memory, MemoryKind, MemoryScope
12
+ from app.core.entities.utility import UtilityObservation
13
+ from app.core.interfaces.embeddings import IEmbeddingProvider
14
+ from app.core.interfaces.unit_of_work import IUnitOfWork
15
+
16
+
17
+ def apply_side_effects(
18
+ plan: list[dict[str, object]],
19
+ uow: IUnitOfWork,
20
+ *,
21
+ embedding_provider: IEmbeddingProvider | None = None,
22
+ ) -> None:
23
+ """This function executes a deterministic side-effect plan inside one transaction."""
24
+
25
+ for effect in plan:
26
+ effect_type = str(effect["effect_type"])
27
+ params = effect["params"]
28
+ assert isinstance(params, dict)
29
+
30
+ if effect_type == "memory.create":
31
+ uow.memories.create(
32
+ Memory(
33
+ id=str(params["memory_id"]),
34
+ repo_id=str(params["repo_id"]),
35
+ scope=MemoryScope(str(params["scope"])),
36
+ kind=MemoryKind(str(params["kind"])),
37
+ text=str(params["text"]),
38
+ )
39
+ )
40
+ continue
41
+
42
+ if effect_type == "memory_embedding.upsert":
43
+ if embedding_provider is None:
44
+ raise RuntimeError("Embedding provider is required for memory_embedding.upsert")
45
+ uow.memories.upsert_embedding(
46
+ memory_id=str(params["memory_id"]),
47
+ model=str(params["model"]),
48
+ vector=embedding_provider.embed(str(params["text"])),
49
+ )
50
+ continue
51
+
52
+ if effect_type == "memory_evidence.attach":
53
+ refs = params["refs"]
54
+ assert isinstance(refs, list)
55
+ for ref in refs:
56
+ evidence = uow.evidence.upsert_ref(repo_id=str(params["repo_id"]), ref=str(ref))
57
+ uow.evidence.link_memory_evidence(memory_id=str(params["memory_id"]), evidence_id=evidence.id)
58
+ continue
59
+
60
+ if effect_type == "problem_attempt.create":
61
+ uow.experiences.create_problem_attempt(
62
+ ProblemAttempt(
63
+ problem_id=str(params["problem_id"]),
64
+ attempt_id=str(params["attempt_id"]),
65
+ role=ProblemAttemptRole(str(params["role"])),
66
+ )
67
+ )
68
+ continue
69
+
70
+ if effect_type == "memory.archive_state":
71
+ updated = uow.memories.set_archived(memory_id=str(params["memory_id"]), archived=bool(params["archived"]))
72
+ if not updated:
73
+ raise LookupError(f"Target shellbrain not found for archive update: {params['memory_id']}")
74
+ continue
75
+
76
+ if effect_type == "utility_observation.append":
77
+ uow.utility.append_observation(
78
+ UtilityObservation(
79
+ id=str(params["id"]),
80
+ memory_id=str(params["memory_id"]),
81
+ problem_id=str(params["problem_id"]),
82
+ vote=float(params["vote"]),
83
+ rationale=str(params["rationale"]) if params.get("rationale") is not None else None,
84
+ )
85
+ )
86
+ continue
87
+
88
+ if effect_type == "fact_update.create":
89
+ uow.experiences.create_fact_update(
90
+ FactUpdate(
91
+ id=str(params["id"]),
92
+ old_fact_id=str(params["old_fact_id"]),
93
+ change_id=str(params["change_id"]),
94
+ new_fact_id=str(params["new_fact_id"]),
95
+ )
96
+ )
97
+ continue
98
+
99
+ if effect_type == "association.upsert_and_observe":
100
+ edge = uow.associations.upsert_edge(
101
+ AssociationEdge(
102
+ id=str(params["edge_id"]),
103
+ repo_id=str(params["repo_id"]),
104
+ from_memory_id=str(params["from_memory_id"]),
105
+ to_memory_id=str(params["to_memory_id"]),
106
+ relation_type=AssociationRelationType(str(params["relation_type"])),
107
+ source_mode=AssociationSourceMode(str(params["source_mode"])),
108
+ state=AssociationState(str(params["state"])),
109
+ strength=float(params["strength"]),
110
+ )
111
+ )
112
+ uow.associations.append_observation(
113
+ AssociationObservation(
114
+ id=str(params["observation_id"]),
115
+ repo_id=str(params["repo_id"]),
116
+ edge_id=edge.id,
117
+ from_memory_id=str(params["from_memory_id"]),
118
+ to_memory_id=str(params["to_memory_id"]),
119
+ relation_type=AssociationRelationType(str(params["relation_type"])),
120
+ source=str(params["observation_source"]),
121
+ valence=float(params["valence"]),
122
+ salience=float(params["salience"]),
123
+ )
124
+ )
125
+ evidence_refs = params.get("evidence_refs", [])
126
+ assert isinstance(evidence_refs, list)
127
+ for ref in evidence_refs:
128
+ evidence = uow.evidence.upsert_ref(repo_id=str(params["repo_id"]), ref=str(ref))
129
+ uow.evidence.link_association_edge_evidence(edge_id=edge.id, evidence_id=evidence.id)
130
+ continue
131
+
132
+ raise ValueError(f"Unsupported side effect type: {effect_type}")
@@ -0,0 +1,9 @@
1
+ """This module defines shared side-effect descriptor helpers."""
2
+
3
+ from typing import Any
4
+
5
+
6
+ def make_side_effect(effect_type: str, params: dict[str, Any]) -> dict[str, Any]:
7
+ """This function creates a normalized side-effect descriptor object."""
8
+
9
+ return {"effect_type": effect_type, "params": params}
@@ -0,0 +1 @@
1
+ """This package defines deterministic create-policy planning and execution."""
@@ -0,0 +1,96 @@
1
+ """This module defines create-policy planning and execution helpers."""
2
+
3
+ from uuid import uuid4
4
+ from typing import Any
5
+
6
+ from app.core.entities.associations import AssociationSourceMode, AssociationState
7
+ from app.core.entities.memory import MemoryKind
8
+ from app.core.interfaces.embeddings import IEmbeddingProvider
9
+ from app.core.interfaces.unit_of_work import IUnitOfWork
10
+ from app.core.policies._shared.executor import apply_side_effects
11
+ from app.core.policies._shared.side_effects import make_side_effect
12
+
13
+
14
+ def build_create_plan(payload: dict[str, Any], *, embedding_model: str = "unknown") -> list[dict[str, Any]]:
15
+ """This function converts a validated create payload into deterministic side effects."""
16
+
17
+ memory = payload["memory"]
18
+ repo_id = payload["repo_id"]
19
+ memory_id = payload["memory_id"]
20
+ plan: list[dict[str, Any]] = [
21
+ make_side_effect(
22
+ "memory.create",
23
+ {
24
+ "memory_id": memory_id,
25
+ "repo_id": repo_id,
26
+ "scope": memory["scope"],
27
+ "kind": memory["kind"],
28
+ "text": memory["text"],
29
+ },
30
+ ),
31
+ make_side_effect(
32
+ "memory_embedding.upsert",
33
+ {
34
+ "memory_id": memory_id,
35
+ "model": embedding_model,
36
+ "text": memory["text"],
37
+ },
38
+ ),
39
+ make_side_effect(
40
+ "memory_evidence.attach",
41
+ {
42
+ "memory_id": memory_id,
43
+ "repo_id": repo_id,
44
+ "refs": list(memory["evidence_refs"]),
45
+ },
46
+ ),
47
+ ]
48
+
49
+ problem_id = (memory.get("links") or {}).get("problem_id")
50
+ if memory["kind"] in {MemoryKind.SOLUTION.value, MemoryKind.FAILED_TACTIC.value} and problem_id:
51
+ plan.append(
52
+ make_side_effect(
53
+ "problem_attempt.create",
54
+ {
55
+ "problem_id": problem_id,
56
+ "attempt_id": memory_id,
57
+ "role": memory["kind"],
58
+ },
59
+ )
60
+ )
61
+
62
+ for association in (memory.get("links") or {}).get("associations", []):
63
+ confidence = association.get("confidence")
64
+ salience = association.get("salience")
65
+ plan.append(
66
+ make_side_effect(
67
+ "association.upsert_and_observe",
68
+ {
69
+ "repo_id": repo_id,
70
+ "edge_id": str(uuid4()),
71
+ "from_memory_id": memory_id,
72
+ "to_memory_id": association["to_memory_id"],
73
+ "relation_type": association["relation_type"],
74
+ "source_mode": AssociationSourceMode.AGENT.value,
75
+ "state": AssociationState.TENTATIVE.value,
76
+ "strength": confidence if confidence is not None else 0.5,
77
+ "observation_id": str(uuid4()),
78
+ "observation_source": "agent_explicit",
79
+ "valence": confidence if confidence is not None else 0.5,
80
+ "salience": salience if salience is not None else 0.5,
81
+ "evidence_refs": list(memory["evidence_refs"]),
82
+ },
83
+ )
84
+ )
85
+ return plan
86
+
87
+
88
+ def apply_create_plan(
89
+ plan: list[dict[str, Any]],
90
+ uow: IUnitOfWork,
91
+ *,
92
+ embedding_provider: IEmbeddingProvider,
93
+ ) -> None:
94
+ """This function executes a deterministic create plan inside one transaction."""
95
+
96
+ apply_side_effects(plan, uow, embedding_provider=embedding_provider)
@@ -0,0 +1 @@
1
+ """This package defines read-policy retrieval and context-pack assembly stages."""
@@ -0,0 +1,114 @@
1
+ """Helpers for scoring lexical candidates with Okapi BM25 and coverage-aware admission."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import Counter
6
+ from dataclasses import dataclass
7
+ from math import log
8
+ from typing import Literal, Sequence
9
+
10
+
11
+ _K1 = 1.2
12
+ _B = 0.75
13
+ _COVERAGE_THRESHOLD_BY_MODE = {
14
+ "targeted": 0.65,
15
+ "ambient": 0.80,
16
+ }
17
+
18
+
19
+ @dataclass(frozen=True, slots=True)
20
+ class BM25Document:
21
+ """Normalized document representation used for BM25 scoring."""
22
+
23
+ memory_id: str
24
+ terms: tuple[str, ...]
25
+
26
+
27
+ @dataclass(frozen=True, slots=True)
28
+ class BM25ScoredDocument:
29
+ """Scored lexical candidate with query-coverage metadata."""
30
+
31
+ memory_id: str
32
+ score: float
33
+ coverage: float
34
+
35
+
36
+ def score_documents(
37
+ query_terms: Sequence[str],
38
+ documents: Sequence[BM25Document],
39
+ *,
40
+ k1: float = _K1,
41
+ b: float = _B,
42
+ ) -> list[BM25ScoredDocument]:
43
+ """Return BM25 scores and weighted query coverage for partially matching documents."""
44
+
45
+ normalized_query_terms = tuple(query_terms)
46
+ if not normalized_query_terms or not documents:
47
+ return []
48
+
49
+ term_frequencies = {document.memory_id: Counter(document.terms) for document in documents}
50
+ corpus_size = len(documents)
51
+ average_length = sum(len(document.terms) for document in documents) / corpus_size
52
+ if average_length <= 0.0:
53
+ return []
54
+
55
+ inverse_document_frequencies = {
56
+ term: _inverse_document_frequency(
57
+ sum(1 for document in documents if term in term_frequencies[document.memory_id]),
58
+ corpus_size,
59
+ )
60
+ for term in normalized_query_terms
61
+ }
62
+ total_query_weight = sum(inverse_document_frequencies.values())
63
+ if total_query_weight <= 0.0:
64
+ return []
65
+
66
+ scored: list[BM25ScoredDocument] = []
67
+ for document in documents:
68
+ frequencies = term_frequencies[document.memory_id]
69
+ matched_terms = tuple(term for term in normalized_query_terms if term in frequencies)
70
+ if not matched_terms:
71
+ continue
72
+
73
+ score = 0.0
74
+ matched_query_weight = 0.0
75
+ document_length = len(document.terms)
76
+ normalization = k1 * (1 - b + b * document_length / average_length)
77
+ for term in matched_terms:
78
+ inverse_document_frequency = inverse_document_frequencies[term]
79
+ term_frequency = frequencies[term]
80
+ score += inverse_document_frequency * ((term_frequency * (k1 + 1)) / (term_frequency + normalization))
81
+ matched_query_weight += inverse_document_frequency
82
+
83
+ if score > 0.0:
84
+ scored.append(
85
+ BM25ScoredDocument(
86
+ memory_id=document.memory_id,
87
+ score=score,
88
+ coverage=matched_query_weight / total_query_weight,
89
+ )
90
+ )
91
+
92
+ return sorted(scored, key=lambda item: (-item.score, item.memory_id))
93
+
94
+
95
+ def admit_scored_documents(
96
+ scored_documents: Sequence[BM25ScoredDocument],
97
+ *,
98
+ mode: Literal["ambient", "targeted"],
99
+ ) -> list[dict[str, object]]:
100
+ """Apply the mode-aware weighted query-coverage gate to scored lexical candidates."""
101
+
102
+ threshold = _COVERAGE_THRESHOLD_BY_MODE[mode]
103
+ admitted = [
104
+ {"memory_id": document.memory_id, "score": document.score}
105
+ for document in scored_documents
106
+ if document.coverage >= threshold
107
+ ]
108
+ return sorted(admitted, key=lambda item: (-float(item["score"]), str(item["memory_id"])))
109
+
110
+
111
+ def _inverse_document_frequency(document_frequency: int, corpus_size: int) -> float:
112
+ """Return the BM25 IDF weight for one query term in the current visible corpus."""
113
+
114
+ return log(1 + (corpus_size - document_frequency + 0.5) / (document_frequency + 0.5))
@@ -0,0 +1,140 @@
1
+ """This module defines bounded context-pack assembly with quotas, dedupe, and hard caps."""
2
+
3
+ from typing import Any
4
+
5
+ from app.boot.read_policy import resolve_read_limit, resolve_read_quotas
6
+
7
+
8
+ _BUCKET_ORDER = ("direct", "explicit", "implicit")
9
+ _SECTION_NAMES = {
10
+ "direct": "direct",
11
+ "explicit": "explicit_related",
12
+ "implicit": "implicit_related",
13
+ }
14
+ _BUCKET_PRIORITY = {bucket_name: index for index, bucket_name in enumerate(_BUCKET_ORDER)}
15
+
16
+
17
+ def assemble_context_pack(scored_candidates: dict[str, list[dict[str, Any]]], payload: dict[str, Any]) -> dict[str, Any]:
18
+ """This function assembles a final context pack from bucketed candidate groups."""
19
+
20
+ mode = str(payload["mode"])
21
+ limit = resolve_read_limit(mode=mode, explicit_limit=payload.get("limit"))
22
+ quotas = resolve_read_quotas(mode=mode)
23
+ sorted_buckets = {
24
+ bucket_name: sorted(
25
+ scored_candidates.get(bucket_name, []),
26
+ key=lambda item: (-float(item["score"]), str(item["memory_id"])),
27
+ )
28
+ for bucket_name in _BUCKET_ORDER
29
+ }
30
+
31
+ selected_by_bucket: dict[str, list[dict[str, Any]]] = {bucket_name: [] for bucket_name in _BUCKET_ORDER}
32
+ seen_memory_ids: set[str] = set()
33
+ spill_pool: list[tuple[str, dict[str, Any]]] = []
34
+ remaining = limit
35
+
36
+ for bucket_name in _BUCKET_ORDER:
37
+ section_quota = min(int(quotas.get(bucket_name, 0)), remaining)
38
+ selected_count = 0
39
+ for candidate in sorted_buckets[bucket_name]:
40
+ memory_id = str(candidate["memory_id"])
41
+ if memory_id in seen_memory_ids:
42
+ continue
43
+ if selected_count < section_quota:
44
+ seen_memory_ids.add(memory_id)
45
+ selected_by_bucket[bucket_name].append(candidate)
46
+ selected_count += 1
47
+ remaining -= 1
48
+ else:
49
+ spill_pool.append((bucket_name, candidate))
50
+
51
+ if remaining > 0:
52
+ spill_pool.sort(
53
+ key=lambda item: (
54
+ -float(item[1]["score"]),
55
+ _BUCKET_PRIORITY[item[0]],
56
+ str(item[1]["memory_id"]),
57
+ )
58
+ )
59
+ for bucket_name, candidate in spill_pool:
60
+ if remaining <= 0:
61
+ break
62
+ memory_id = str(candidate["memory_id"])
63
+ if memory_id in seen_memory_ids:
64
+ continue
65
+ seen_memory_ids.add(memory_id)
66
+ selected_by_bucket[bucket_name].append(candidate)
67
+ remaining -= 1
68
+
69
+ sections = {
70
+ "direct": [_shape_item(candidate, "direct") for candidate in selected_by_bucket["direct"]],
71
+ "explicit_related": [_shape_item(candidate, "explicit") for candidate in selected_by_bucket["explicit"]],
72
+ "implicit_related": [_shape_item(candidate, "implicit") for candidate in selected_by_bucket["implicit"]],
73
+ }
74
+ _assign_priorities(sections)
75
+ return {
76
+ "meta": {
77
+ "mode": mode,
78
+ "query": payload.get("query"),
79
+ "limit": limit,
80
+ "counts": {
81
+ "direct": len(sections["direct"]),
82
+ "explicit_related": len(sections["explicit_related"]),
83
+ "implicit_related": len(sections["implicit_related"]),
84
+ },
85
+ },
86
+ "direct": sections["direct"],
87
+ "explicit_related": sections["explicit_related"],
88
+ "implicit_related": sections["implicit_related"],
89
+ }
90
+
91
+
92
+ def _shape_item(candidate: dict[str, Any], bucket_name: str) -> dict[str, Any]:
93
+ """Project one internal candidate into the compact LLM-facing item shape."""
94
+
95
+ item: dict[str, Any] = {
96
+ "memory_id": str(candidate["memory_id"]),
97
+ "why_included": _resolve_why_included(candidate, bucket_name),
98
+ }
99
+ if "kind" in candidate:
100
+ item["kind"] = _normalize_kind(candidate["kind"])
101
+ if "text" in candidate:
102
+ item["text"] = str(candidate["text"])
103
+ if bucket_name != "direct" and "anchor_memory_id" in candidate:
104
+ item["anchor_memory_id"] = str(candidate["anchor_memory_id"])
105
+ if "relation_type" in candidate:
106
+ item["relation_type"] = str(candidate["relation_type"])
107
+ return item
108
+
109
+
110
+ def _resolve_why_included(candidate: dict[str, Any], bucket_name: str) -> str:
111
+ """Resolve a stable user-facing inclusion reason from candidate metadata."""
112
+
113
+ if "why_included" in candidate:
114
+ return str(candidate["why_included"])
115
+ if bucket_name == "direct":
116
+ return "direct_match"
117
+ if bucket_name == "implicit":
118
+ return "semantic_neighbor"
119
+ expansion_type = str(candidate.get("expansion_type", ""))
120
+ return {
121
+ "problem_attempt": "problem_attempt",
122
+ "fact_update": "fact_update",
123
+ "association": "association_link",
124
+ }.get(expansion_type, expansion_type or "related_memory")
125
+
126
+
127
+ def _normalize_kind(kind: Any) -> str:
128
+ """Normalize shellbrain kind values from entities or strings into JSON-safe text."""
129
+
130
+ return str(getattr(kind, "value", kind))
131
+
132
+
133
+ def _assign_priorities(sections: dict[str, list[dict[str, Any]]]) -> None:
134
+ """Assign one global priority order across displayed sections."""
135
+
136
+ priority = 1
137
+ for section_name in ("direct", "explicit_related", "implicit_related"):
138
+ for item in sections[section_name]:
139
+ item["priority"] = priority
140
+ priority += 1