shellbrain 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. app/__init__.py +1 -0
  2. app/__main__.py +7 -0
  3. app/boot/__init__.py +1 -0
  4. app/boot/admin_db.py +88 -0
  5. app/boot/config.py +14 -0
  6. app/boot/create_policy.py +52 -0
  7. app/boot/db.py +70 -0
  8. app/boot/embeddings.py +55 -0
  9. app/boot/home.py +45 -0
  10. app/boot/migrations.py +61 -0
  11. app/boot/read_policy.py +179 -0
  12. app/boot/repos.py +15 -0
  13. app/boot/retrieval.py +3 -0
  14. app/boot/thresholds.py +19 -0
  15. app/boot/update_policy.py +34 -0
  16. app/boot/use_cases.py +22 -0
  17. app/config/__init__.py +1 -0
  18. app/config/defaults/create_policy.yaml +7 -0
  19. app/config/defaults/read_policy.yaml +25 -0
  20. app/config/defaults/runtime.yaml +10 -0
  21. app/config/defaults/thresholds.yaml +3 -0
  22. app/config/defaults/update_policy.yaml +5 -0
  23. app/config/loader.py +58 -0
  24. app/core/__init__.py +1 -0
  25. app/core/contracts/__init__.py +1 -0
  26. app/core/contracts/errors.py +29 -0
  27. app/core/contracts/requests.py +211 -0
  28. app/core/contracts/responses.py +15 -0
  29. app/core/entities/__init__.py +1 -0
  30. app/core/entities/associations.py +58 -0
  31. app/core/entities/episodes.py +66 -0
  32. app/core/entities/evidence.py +29 -0
  33. app/core/entities/facts.py +30 -0
  34. app/core/entities/guidance.py +47 -0
  35. app/core/entities/identity.py +48 -0
  36. app/core/entities/memory.py +34 -0
  37. app/core/entities/runtime_context.py +19 -0
  38. app/core/entities/session_state.py +31 -0
  39. app/core/entities/telemetry.py +152 -0
  40. app/core/entities/utility.py +14 -0
  41. app/core/interfaces/__init__.py +1 -0
  42. app/core/interfaces/clock.py +12 -0
  43. app/core/interfaces/config.py +28 -0
  44. app/core/interfaces/embeddings.py +12 -0
  45. app/core/interfaces/idgen.py +11 -0
  46. app/core/interfaces/repos.py +279 -0
  47. app/core/interfaces/retrieval.py +20 -0
  48. app/core/interfaces/session_state_store.py +33 -0
  49. app/core/interfaces/unit_of_work.py +50 -0
  50. app/core/policies/__init__.py +1 -0
  51. app/core/policies/_shared/__init__.py +1 -0
  52. app/core/policies/_shared/executor.py +132 -0
  53. app/core/policies/_shared/side_effects.py +9 -0
  54. app/core/policies/create_policy/__init__.py +1 -0
  55. app/core/policies/create_policy/pipeline.py +96 -0
  56. app/core/policies/read_policy/__init__.py +1 -0
  57. app/core/policies/read_policy/bm25.py +114 -0
  58. app/core/policies/read_policy/context_pack_builder.py +140 -0
  59. app/core/policies/read_policy/expansion.py +132 -0
  60. app/core/policies/read_policy/fusion_rrf.py +34 -0
  61. app/core/policies/read_policy/lexical_query.py +101 -0
  62. app/core/policies/read_policy/pipeline.py +93 -0
  63. app/core/policies/read_policy/scenario_lift.py +11 -0
  64. app/core/policies/read_policy/scoring.py +61 -0
  65. app/core/policies/read_policy/seed_retrieval.py +54 -0
  66. app/core/policies/read_policy/utility_prior.py +11 -0
  67. app/core/policies/update_policy/__init__.py +1 -0
  68. app/core/policies/update_policy/pipeline.py +80 -0
  69. app/core/use_cases/__init__.py +1 -0
  70. app/core/use_cases/build_guidance.py +85 -0
  71. app/core/use_cases/create_memory.py +26 -0
  72. app/core/use_cases/manage_session_state.py +159 -0
  73. app/core/use_cases/read_memory.py +21 -0
  74. app/core/use_cases/record_episode_sync_telemetry.py +19 -0
  75. app/core/use_cases/record_operation_telemetry.py +32 -0
  76. app/core/use_cases/sync_episode.py +162 -0
  77. app/core/use_cases/update_memory.py +40 -0
  78. app/migrations/__init__.py +1 -0
  79. app/migrations/env.py +65 -0
  80. app/migrations/versions/20260226_0001_initial_schema.py +232 -0
  81. app/migrations/versions/20260312_0002_add_hard_invariants.py +60 -0
  82. app/migrations/versions/20260312_0003_drop_create_confidence.py +40 -0
  83. app/migrations/versions/20260313_0004_episode_sync_hardening.py +71 -0
  84. app/migrations/versions/20260313_0005_evidence_episode_event_refs.py +45 -0
  85. app/migrations/versions/20260318_0006_usage_telemetry_schema.py +175 -0
  86. app/migrations/versions/20260319_0007_identity_session_guidance.py +49 -0
  87. app/migrations/versions/20260320_0008_instance_metadata_and_backup_safety.py +31 -0
  88. app/migrations/versions/__init__.py +1 -0
  89. app/periphery/__init__.py +1 -0
  90. app/periphery/admin/__init__.py +1 -0
  91. app/periphery/admin/backup.py +360 -0
  92. app/periphery/admin/destructive_guard.py +32 -0
  93. app/periphery/admin/doctor.py +192 -0
  94. app/periphery/admin/init.py +996 -0
  95. app/periphery/admin/instance_guard.py +211 -0
  96. app/periphery/admin/machine_state.py +354 -0
  97. app/periphery/admin/privileges.py +42 -0
  98. app/periphery/admin/repo_state.py +266 -0
  99. app/periphery/admin/restore.py +30 -0
  100. app/periphery/cli/__init__.py +1 -0
  101. app/periphery/cli/handlers.py +830 -0
  102. app/periphery/cli/hydration.py +119 -0
  103. app/periphery/cli/main.py +710 -0
  104. app/periphery/cli/presenter_json.py +10 -0
  105. app/periphery/cli/schema_validation.py +201 -0
  106. app/periphery/db/__init__.py +1 -0
  107. app/periphery/db/engine.py +10 -0
  108. app/periphery/db/models/__init__.py +1 -0
  109. app/periphery/db/models/associations.py +55 -0
  110. app/periphery/db/models/episodes.py +55 -0
  111. app/periphery/db/models/evidence.py +19 -0
  112. app/periphery/db/models/experiences.py +33 -0
  113. app/periphery/db/models/instance_metadata.py +17 -0
  114. app/periphery/db/models/memories.py +39 -0
  115. app/periphery/db/models/metadata.py +6 -0
  116. app/periphery/db/models/registry.py +18 -0
  117. app/periphery/db/models/telemetry.py +174 -0
  118. app/periphery/db/models/utility.py +19 -0
  119. app/periphery/db/models/views.py +154 -0
  120. app/periphery/db/repos/__init__.py +1 -0
  121. app/periphery/db/repos/relational/__init__.py +1 -0
  122. app/periphery/db/repos/relational/associations_repo.py +117 -0
  123. app/periphery/db/repos/relational/episodes_repo.py +188 -0
  124. app/periphery/db/repos/relational/evidence_repo.py +82 -0
  125. app/periphery/db/repos/relational/experiences_repo.py +41 -0
  126. app/periphery/db/repos/relational/memories_repo.py +99 -0
  127. app/periphery/db/repos/relational/read_policy_repo.py +202 -0
  128. app/periphery/db/repos/relational/telemetry_repo.py +161 -0
  129. app/periphery/db/repos/relational/utility_repo.py +30 -0
  130. app/periphery/db/repos/semantic/__init__.py +1 -0
  131. app/periphery/db/repos/semantic/keyword_retrieval_repo.py +63 -0
  132. app/periphery/db/repos/semantic/semantic_retrieval_repo.py +111 -0
  133. app/periphery/db/session.py +10 -0
  134. app/periphery/db/uow.py +75 -0
  135. app/periphery/embeddings/__init__.py +1 -0
  136. app/periphery/embeddings/local_provider.py +35 -0
  137. app/periphery/embeddings/query_vector_search.py +18 -0
  138. app/periphery/episodes/__init__.py +1 -0
  139. app/periphery/episodes/claude_code.py +387 -0
  140. app/periphery/episodes/codex.py +423 -0
  141. app/periphery/episodes/launcher.py +66 -0
  142. app/periphery/episodes/normalization.py +31 -0
  143. app/periphery/episodes/poller.py +299 -0
  144. app/periphery/episodes/source_discovery.py +66 -0
  145. app/periphery/episodes/tool_filter.py +165 -0
  146. app/periphery/identity/__init__.py +1 -0
  147. app/periphery/identity/claude_hook_install.py +67 -0
  148. app/periphery/identity/claude_runtime.py +83 -0
  149. app/periphery/identity/codex_runtime.py +32 -0
  150. app/periphery/identity/compatibility.py +38 -0
  151. app/periphery/identity/resolver.py +163 -0
  152. app/periphery/session_state/__init__.py +1 -0
  153. app/periphery/session_state/file_store.py +100 -0
  154. app/periphery/telemetry/__init__.py +33 -0
  155. app/periphery/telemetry/operation_summary.py +299 -0
  156. app/periphery/telemetry/session_selection.py +156 -0
  157. app/periphery/telemetry/sync_summary.py +65 -0
  158. app/periphery/validation/__init__.py +1 -0
  159. app/periphery/validation/integrity_validation.py +253 -0
  160. app/periphery/validation/semantic_validation.py +94 -0
  161. shellbrain-0.1.0.dist-info/METADATA +130 -0
  162. shellbrain-0.1.0.dist-info/RECORD +165 -0
  163. shellbrain-0.1.0.dist-info/WHEEL +5 -0
  164. shellbrain-0.1.0.dist-info/entry_points.txt +2 -0
  165. shellbrain-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,99 @@
1
+ """This module defines relational repository operations for shellbrain aggregates."""
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import Sequence
5
+
6
+ from sqlalchemy import select, update
7
+ from sqlalchemy.dialects.postgresql import insert
8
+
9
+ from app.core.entities.memory import Memory, MemoryKind, MemoryScope
10
+ from app.core.interfaces.repos import IMemoriesRepo
11
+ from app.periphery.db.models.memories import memories, memory_embeddings
12
+
13
+
14
+ class MemoriesRepo(IMemoriesRepo):
15
+ """This class provides relational persistence operations for memories."""
16
+
17
+ def __init__(self, session) -> None:
18
+ """This method stores the active DB session for repository operations."""
19
+
20
+ self._session = session
21
+
22
+ def create(self, memory: Memory) -> None:
23
+ """This method persists a shellbrain record into relational storage."""
24
+
25
+ self._session.execute(
26
+ memories.insert().values(
27
+ id=memory.id,
28
+ repo_id=memory.repo_id,
29
+ scope=memory.scope.value,
30
+ kind=memory.kind.value,
31
+ text=memory.text,
32
+ created_at=datetime.now(timezone.utc),
33
+ archived=memory.archived,
34
+ )
35
+ )
36
+
37
+ def get(self, memory_id: str) -> Memory | None:
38
+ """This method loads a shellbrain record by identifier."""
39
+
40
+ row = self._session.execute(select(memories).where(memories.c.id == memory_id)).mappings().first()
41
+ if row is None:
42
+ return None
43
+ return self._to_memory(row)
44
+
45
+ def list_by_ids(self, ids: Sequence[str]) -> Sequence[Memory]:
46
+ """This method loads visible shellbrain records in the caller's identifier order."""
47
+
48
+ unique_ids = list(dict.fromkeys(str(memory_id) for memory_id in ids))
49
+ if not unique_ids:
50
+ return []
51
+ rows = self._session.execute(select(memories).where(memories.c.id.in_(unique_ids))).mappings().all()
52
+ memories_by_id = {
53
+ str(row["id"]): self._to_memory(row)
54
+ for row in rows
55
+ }
56
+ return [memories_by_id[memory_id] for memory_id in unique_ids if memory_id in memories_by_id]
57
+
58
+ def _to_memory(self, row) -> Memory:
59
+ """Convert one relational row into the canonical shellbrain entity."""
60
+
61
+ return Memory(
62
+ id=row["id"],
63
+ repo_id=row["repo_id"],
64
+ scope=MemoryScope(row["scope"]),
65
+ kind=MemoryKind(row["kind"]),
66
+ text=row["text"],
67
+ archived=row["archived"],
68
+ )
69
+
70
+ def set_archived(self, *, memory_id: str, archived: bool) -> bool:
71
+ """This method updates the archived state for a shellbrain and returns whether a row changed."""
72
+
73
+ result = self._session.execute(
74
+ update(memories).where(memories.c.id == memory_id).values(archived=archived)
75
+ )
76
+ return bool(result.rowcount)
77
+
78
+ def upsert_embedding(self, *, memory_id: str, model: str, vector: Sequence[float]) -> None:
79
+ """This method inserts or updates the embedding vector for the target memory."""
80
+
81
+ self._session.execute(
82
+ insert(memory_embeddings)
83
+ .values(
84
+ memory_id=memory_id,
85
+ model=model,
86
+ dim=len(vector),
87
+ vector=list(vector),
88
+ created_at=datetime.now(timezone.utc),
89
+ )
90
+ .on_conflict_do_update(
91
+ index_elements=["memory_id"],
92
+ set_={
93
+ "model": model,
94
+ "dim": len(vector),
95
+ "vector": list(vector),
96
+ "created_at": datetime.now(timezone.utc),
97
+ },
98
+ )
99
+ )
@@ -0,0 +1,202 @@
1
+ """This module defines SQL-backed read-path visibility and explicit expansion queries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Sequence
6
+
7
+ from sqlalchemy import literal, or_, select, union_all
8
+
9
+ from app.core.interfaces.repos import IReadPolicyRepo
10
+ from app.periphery.db.models.associations import association_edges
11
+ from app.periphery.db.models.experiences import fact_updates, problem_attempts
12
+ from app.periphery.db.models.memories import memories
13
+
14
+
15
+ class ReadPolicyRepo(IReadPolicyRepo):
16
+ """This class provides visibility-gated read-path expansion queries."""
17
+
18
+ def __init__(self, session) -> None:
19
+ """This method stores the active DB session for read-path queries."""
20
+
21
+ self._session = session
22
+
23
+ def list_problem_attempt_neighbors(
24
+ self,
25
+ *,
26
+ repo_id: str,
27
+ include_global: bool,
28
+ anchor_memory_id: str,
29
+ kinds: Sequence[str] | None,
30
+ ) -> Sequence[dict[str, Any]]:
31
+ """This method returns visible problem-attempt neighbors for an anchor memory."""
32
+
33
+ attempt_stmt = (
34
+ select(problem_attempts.c.attempt_id.label("memory_id"), literal("problem_attempt").label("expansion_type"))
35
+ .select_from(problem_attempts.join(memories, memories.c.id == problem_attempts.c.attempt_id))
36
+ .where(
37
+ problem_attempts.c.problem_id == anchor_memory_id,
38
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
39
+ )
40
+ )
41
+ problem_stmt = (
42
+ select(problem_attempts.c.problem_id.label("memory_id"), literal("problem_attempt").label("expansion_type"))
43
+ .select_from(problem_attempts.join(memories, memories.c.id == problem_attempts.c.problem_id))
44
+ .where(
45
+ problem_attempts.c.attempt_id == anchor_memory_id,
46
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
47
+ )
48
+ )
49
+ union_stmt = union_all(attempt_stmt, problem_stmt).subquery()
50
+ stmt = (
51
+ select(union_stmt.c.memory_id, union_stmt.c.expansion_type)
52
+ .distinct()
53
+ .order_by(union_stmt.c.memory_id.asc())
54
+ )
55
+ return list(self._session.execute(stmt).mappings().all())
56
+
57
+ def list_fact_update_neighbors(
58
+ self,
59
+ *,
60
+ repo_id: str,
61
+ include_global: bool,
62
+ anchor_memory_id: str,
63
+ kinds: Sequence[str] | None,
64
+ ) -> Sequence[dict[str, Any]]:
65
+ """This method returns visible fact-update neighbors for an anchor memory."""
66
+
67
+ old_stmt = (
68
+ select(fact_updates.c.old_fact_id.label("memory_id"), literal("fact_update").label("expansion_type"))
69
+ .select_from(fact_updates.join(memories, memories.c.id == fact_updates.c.old_fact_id))
70
+ .where(
71
+ or_(
72
+ fact_updates.c.change_id == anchor_memory_id,
73
+ fact_updates.c.new_fact_id == anchor_memory_id,
74
+ ),
75
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
76
+ )
77
+ )
78
+ change_stmt = (
79
+ select(fact_updates.c.change_id.label("memory_id"), literal("fact_update").label("expansion_type"))
80
+ .select_from(fact_updates.join(memories, memories.c.id == fact_updates.c.change_id))
81
+ .where(
82
+ or_(
83
+ fact_updates.c.old_fact_id == anchor_memory_id,
84
+ fact_updates.c.new_fact_id == anchor_memory_id,
85
+ ),
86
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
87
+ )
88
+ )
89
+ new_stmt = (
90
+ select(fact_updates.c.new_fact_id.label("memory_id"), literal("fact_update").label("expansion_type"))
91
+ .select_from(fact_updates.join(memories, memories.c.id == fact_updates.c.new_fact_id))
92
+ .where(
93
+ or_(
94
+ fact_updates.c.old_fact_id == anchor_memory_id,
95
+ fact_updates.c.change_id == anchor_memory_id,
96
+ ),
97
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
98
+ )
99
+ )
100
+ union_stmt = union_all(old_stmt, change_stmt, new_stmt).subquery()
101
+ stmt = (
102
+ select(union_stmt.c.memory_id, union_stmt.c.expansion_type)
103
+ .where(union_stmt.c.memory_id != anchor_memory_id)
104
+ .distinct()
105
+ .order_by(union_stmt.c.memory_id.asc())
106
+ )
107
+ return list(self._session.execute(stmt).mappings().all())
108
+
109
+ def list_association_neighbors(
110
+ self,
111
+ *,
112
+ repo_id: str,
113
+ include_global: bool,
114
+ anchor_memory_id: str,
115
+ kinds: Sequence[str] | None,
116
+ min_strength: float,
117
+ ) -> Sequence[dict[str, Any]]:
118
+ """This method returns visible association neighbors for an anchor memory."""
119
+
120
+ from_stmt = (
121
+ select(
122
+ association_edges.c.to_memory_id.label("memory_id"),
123
+ association_edges.c.relation_type,
124
+ association_edges.c.strength,
125
+ literal("association").label("expansion_type"),
126
+ )
127
+ .select_from(association_edges.join(memories, memories.c.id == association_edges.c.to_memory_id))
128
+ .where(
129
+ association_edges.c.repo_id == repo_id,
130
+ association_edges.c.from_memory_id == anchor_memory_id,
131
+ association_edges.c.state != "deprecated",
132
+ association_edges.c.strength >= min_strength,
133
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
134
+ )
135
+ )
136
+ reverse_associated_stmt = (
137
+ select(
138
+ association_edges.c.from_memory_id.label("memory_id"),
139
+ association_edges.c.relation_type,
140
+ association_edges.c.strength,
141
+ literal("association").label("expansion_type"),
142
+ )
143
+ .select_from(association_edges.join(memories, memories.c.id == association_edges.c.from_memory_id))
144
+ .where(
145
+ association_edges.c.repo_id == repo_id,
146
+ association_edges.c.to_memory_id == anchor_memory_id,
147
+ association_edges.c.relation_type == "associated_with",
148
+ association_edges.c.state != "deprecated",
149
+ association_edges.c.strength >= min_strength,
150
+ *self._visibility_filters(repo_id=repo_id, include_global=include_global, kinds=kinds),
151
+ )
152
+ )
153
+ union_stmt = union_all(from_stmt, reverse_associated_stmt).subquery()
154
+ stmt = (
155
+ select(
156
+ union_stmt.c.memory_id,
157
+ union_stmt.c.relation_type,
158
+ union_stmt.c.strength,
159
+ union_stmt.c.expansion_type,
160
+ )
161
+ .where(union_stmt.c.memory_id != anchor_memory_id)
162
+ .order_by(union_stmt.c.strength.desc(), union_stmt.c.memory_id.asc(), union_stmt.c.relation_type.asc())
163
+ )
164
+ rows = self._session.execute(stmt).mappings().all()
165
+ best_by_memory_id: dict[str, dict[str, Any]] = {}
166
+ for row in rows:
167
+ memory_id = str(row["memory_id"])
168
+ strength = float(row["strength"])
169
+ relation_type = str(row["relation_type"])
170
+ current = best_by_memory_id.get(memory_id)
171
+ if current is None or strength > float(current["strength"]) or (
172
+ strength == float(current["strength"]) and relation_type < str(current["relation_type"])
173
+ ):
174
+ best_by_memory_id[memory_id] = {
175
+ "memory_id": memory_id,
176
+ "relation_type": relation_type,
177
+ "strength": strength,
178
+ "expansion_type": str(row["expansion_type"]),
179
+ }
180
+ return sorted(
181
+ best_by_memory_id.values(),
182
+ key=lambda item: (-float(item["strength"]), str(item["memory_id"]), str(item["relation_type"])),
183
+ )
184
+
185
+ def _visibility_filters(
186
+ self,
187
+ *,
188
+ repo_id: str,
189
+ include_global: bool,
190
+ kinds: Sequence[str] | None,
191
+ ) -> list[Any]:
192
+ """Build the visibility filters used by read-path queries."""
193
+
194
+ scope_values = ["repo", "global"] if include_global else ["repo"]
195
+ filters: list[Any] = [
196
+ memories.c.repo_id == repo_id,
197
+ memories.c.archived.is_(False),
198
+ memories.c.scope.in_(scope_values),
199
+ ]
200
+ if kinds:
201
+ filters.append(memories.c.kind.in_(list(kinds)))
202
+ return filters
@@ -0,0 +1,161 @@
1
+ """Relational repository for low-overhead telemetry persistence."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import asdict
6
+ from datetime import datetime, timezone
7
+
8
+ from sqlalchemy import delete, func, select, update
9
+
10
+ from app.core.entities.guidance import PendingUtilityCandidate
11
+ from app.core.entities.telemetry import (
12
+ EpisodeSyncRunRecord,
13
+ EpisodeSyncToolTypeRecord,
14
+ OperationInvocationRecord,
15
+ ReadResultItemRecord,
16
+ ReadSummaryRecord,
17
+ WriteEffectItemRecord,
18
+ WriteSummaryRecord,
19
+ )
20
+ from app.periphery.db.models.memories import memories
21
+ from app.core.interfaces.repos import ITelemetryRepo
22
+ from app.periphery.db.models.telemetry import (
23
+ episode_sync_runs,
24
+ episode_sync_tool_types,
25
+ operation_invocations,
26
+ read_invocation_summaries,
27
+ read_result_items,
28
+ write_effect_items,
29
+ write_invocation_summaries,
30
+ )
31
+ from app.periphery.db.models.utility import utility_observations
32
+
33
+
34
+ class TelemetryRepo(ITelemetryRepo):
35
+ """Append-heavy relational persistence for operational telemetry."""
36
+
37
+ def __init__(self, session) -> None:
38
+ """Store the active session used to persist telemetry rows."""
39
+
40
+ self._session = session
41
+
42
+ def insert_operation_invocation(self, record: OperationInvocationRecord) -> None:
43
+ """Append one parent invocation row."""
44
+
45
+ self._session.execute(operation_invocations.insert().values(**asdict(record)))
46
+
47
+ def insert_read_summary(
48
+ self,
49
+ summary: ReadSummaryRecord,
50
+ items: tuple[ReadResultItemRecord, ...] | list[ReadResultItemRecord],
51
+ ) -> None:
52
+ """Replace one read summary row and its ordered result items."""
53
+
54
+ invocation_id = summary.invocation_id
55
+ self._session.execute(delete(read_result_items).where(read_result_items.c.invocation_id == invocation_id))
56
+ self._session.execute(
57
+ delete(read_invocation_summaries).where(read_invocation_summaries.c.invocation_id == invocation_id)
58
+ )
59
+ self._session.execute(read_invocation_summaries.insert().values(**asdict(summary)))
60
+ if items:
61
+ self._session.execute(read_result_items.insert(), [asdict(item) for item in items])
62
+
63
+ def insert_write_summary(
64
+ self,
65
+ summary: WriteSummaryRecord,
66
+ items: tuple[WriteEffectItemRecord, ...] | list[WriteEffectItemRecord],
67
+ ) -> None:
68
+ """Replace one write summary row and its ordered effect items."""
69
+
70
+ invocation_id = summary.invocation_id
71
+ self._session.execute(delete(write_effect_items).where(write_effect_items.c.invocation_id == invocation_id))
72
+ self._session.execute(
73
+ delete(write_invocation_summaries).where(write_invocation_summaries.c.invocation_id == invocation_id)
74
+ )
75
+ self._session.execute(write_invocation_summaries.insert().values(**asdict(summary)))
76
+ if items:
77
+ self._session.execute(write_effect_items.insert(), [asdict(item) for item in items])
78
+
79
+ def insert_episode_sync_run(
80
+ self,
81
+ run: EpisodeSyncRunRecord,
82
+ tool_types: tuple[EpisodeSyncToolTypeRecord, ...] | list[EpisodeSyncToolTypeRecord],
83
+ ) -> None:
84
+ """Append one sync-run row and its per-tool counts."""
85
+
86
+ self._session.execute(episode_sync_runs.insert().values(**asdict(run)))
87
+ if tool_types:
88
+ self._session.execute(episode_sync_tool_types.insert(), [asdict(item) for item in tool_types])
89
+
90
+ def update_operation_polling(self, invocation_id: str, *, attempted: bool, started: bool) -> None:
91
+ """Patch poller-start bookkeeping on an existing invocation row."""
92
+
93
+ self._session.execute(
94
+ update(operation_invocations)
95
+ .where(operation_invocations.c.id == invocation_id)
96
+ .values(
97
+ poller_start_attempted=attempted,
98
+ poller_started=started,
99
+ )
100
+ )
101
+
102
+ def list_pending_utility_candidates(
103
+ self,
104
+ *,
105
+ repo_id: str,
106
+ caller_id: str,
107
+ problem_id: str,
108
+ since_iso: str,
109
+ ) -> list[PendingUtilityCandidate]:
110
+ """Return retrieved memories that still lack a utility vote for one problem."""
111
+
112
+ stmt = (
113
+ select(
114
+ read_result_items.c.memory_id,
115
+ func.max(read_result_items.c.kind).label("kind"),
116
+ func.count().label("retrieval_count"),
117
+ func.max(operation_invocations.c.created_at).label("last_seen_at"),
118
+ )
119
+ .select_from(
120
+ read_result_items.join(
121
+ operation_invocations,
122
+ operation_invocations.c.id == read_result_items.c.invocation_id,
123
+ )
124
+ .join(memories, memories.c.id == read_result_items.c.memory_id)
125
+ .outerjoin(
126
+ utility_observations,
127
+ (utility_observations.c.memory_id == read_result_items.c.memory_id)
128
+ & (utility_observations.c.problem_id == problem_id),
129
+ )
130
+ )
131
+ .where(
132
+ operation_invocations.c.repo_id == repo_id,
133
+ operation_invocations.c.command == "read",
134
+ operation_invocations.c.outcome == "ok",
135
+ operation_invocations.c.selected_thread_id == caller_id,
136
+ operation_invocations.c.created_at >= _parse_iso(since_iso),
137
+ read_result_items.c.memory_id != problem_id,
138
+ utility_observations.c.id.is_(None),
139
+ )
140
+ .group_by(read_result_items.c.memory_id)
141
+ .order_by(func.count().desc(), func.max(operation_invocations.c.created_at).desc())
142
+ )
143
+ rows = self._session.execute(stmt).mappings().all()
144
+ return [
145
+ PendingUtilityCandidate(
146
+ memory_id=str(row["memory_id"]),
147
+ kind=str(row["kind"]),
148
+ retrieval_count=int(row["retrieval_count"]),
149
+ last_seen_at=row["last_seen_at"].isoformat(),
150
+ )
151
+ for row in rows
152
+ ]
153
+
154
+
155
+ def _parse_iso(value: str) -> datetime:
156
+ """Parse one ISO timestamp into a timezone-aware datetime."""
157
+
158
+ parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
159
+ if parsed.tzinfo is None:
160
+ return parsed.replace(tzinfo=timezone.utc)
161
+ return parsed.astimezone(timezone.utc)
@@ -0,0 +1,30 @@
1
+ """This module defines relational repository operations for utility feedback entries."""
2
+
3
+ from datetime import datetime, timezone
4
+
5
+ from app.core.entities.utility import UtilityObservation
6
+ from app.core.interfaces.repos import IUtilityRepo
7
+ from app.periphery.db.models.utility import utility_observations
8
+
9
+
10
+ class UtilityRepo(IUtilityRepo):
11
+ """This class provides persistence operations for utility observations."""
12
+
13
+ def __init__(self, session) -> None:
14
+ """This method stores the active DB session for repository operations."""
15
+
16
+ self._session = session
17
+
18
+ def append_observation(self, observation: UtilityObservation) -> None:
19
+ """This method appends a utility observation row."""
20
+
21
+ self._session.execute(
22
+ utility_observations.insert().values(
23
+ id=observation.id,
24
+ memory_id=observation.memory_id,
25
+ problem_id=observation.problem_id,
26
+ vote=observation.vote,
27
+ rationale=observation.rationale,
28
+ created_at=datetime.now(timezone.utc),
29
+ )
30
+ )
@@ -0,0 +1 @@
1
+ """This package contains semantic retrieval repository classes."""
@@ -0,0 +1,63 @@
1
+ """This module defines keyword-lane retrieval operations backed by app-side BM25."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Literal, Sequence
6
+
7
+ from sqlalchemy import select
8
+
9
+ from app.core.policies.read_policy.bm25 import BM25Document, admit_scored_documents, score_documents
10
+ from app.core.policies.read_policy.lexical_query import build_lexical_query, normalize_lexical_text
11
+ from app.core.interfaces.repos import IKeywordRetrievalRepo
12
+ from app.periphery.db.models.memories import memories
13
+
14
+
15
+ class KeywordRetrievalRepo(IKeywordRetrievalRepo):
16
+ """This class provides lexical retrieval candidates from app-side BM25 scoring."""
17
+
18
+ def __init__(self, session) -> None:
19
+ """This method stores the active DB session for keyword retrieval operations."""
20
+
21
+ self._session = session
22
+
23
+ def query_keyword(
24
+ self,
25
+ *,
26
+ repo_id: str,
27
+ mode: Literal["ambient", "targeted"],
28
+ include_global: bool,
29
+ query_text: str,
30
+ kinds: Sequence[str] | None,
31
+ limit: int,
32
+ ) -> Sequence[dict[str, Any]]:
33
+ """This method returns keyword candidates and lexical ranking scores."""
34
+
35
+ lexical_query = build_lexical_query(query_text)
36
+ if not lexical_query.terms:
37
+ return []
38
+
39
+ scope_values = ["repo", "global"] if include_global else ["repo"]
40
+ stmt = (
41
+ select(
42
+ memories.c.id.label("memory_id"),
43
+ memories.c.text,
44
+ )
45
+ .where(
46
+ memories.c.repo_id == repo_id,
47
+ memories.c.archived.is_(False),
48
+ memories.c.scope.in_(scope_values),
49
+ )
50
+ .order_by(memories.c.id.asc())
51
+ )
52
+ if kinds:
53
+ stmt = stmt.where(memories.c.kind.in_(list(kinds)))
54
+
55
+ documents = [
56
+ BM25Document(
57
+ memory_id=str(row["memory_id"]),
58
+ terms=normalize_lexical_text(str(row["text"])).terms_for(lexical_query),
59
+ )
60
+ for row in self._session.execute(stmt).mappings().all()
61
+ ]
62
+ scored_documents = score_documents(lexical_query.terms, documents)
63
+ return admit_scored_documents(scored_documents, mode=mode)[:limit]
@@ -0,0 +1,111 @@
1
+ """This module defines semantic-lane retrieval operations over stored shellbrain embeddings."""
2
+
3
+ from math import sqrt
4
+ from typing import Any, Sequence
5
+
6
+ from sqlalchemy import select
7
+
8
+ from app.core.interfaces.repos import ISemanticRetrievalRepo
9
+ from app.periphery.db.models.memories import memories, memory_embeddings
10
+
11
+
12
+ class SemanticRetrievalRepo(ISemanticRetrievalRepo):
13
+ """This class provides semantic retrieval candidates from embedding similarity."""
14
+
15
+ def __init__(self, session) -> None:
16
+ """This method stores the active DB session for semantic retrieval operations."""
17
+
18
+ self._session = session
19
+
20
+ def query_semantic(
21
+ self,
22
+ *,
23
+ repo_id: str,
24
+ include_global: bool,
25
+ query_vector: Sequence[float],
26
+ kinds: Sequence[str] | None,
27
+ limit: int,
28
+ ) -> Sequence[dict[str, Any]]:
29
+ """This method returns semantic candidates and similarity scores."""
30
+
31
+ if not query_vector:
32
+ return []
33
+
34
+ scored: list[dict[str, Any]] = []
35
+ for row in self._visible_embedding_rows(repo_id=repo_id, include_global=include_global, kinds=kinds):
36
+ score = _cosine_similarity(list(query_vector), row["vector"])
37
+ if score <= 0.0:
38
+ continue
39
+ scored.append({"memory_id": row["memory_id"], "score": score})
40
+ scored.sort(key=lambda item: (-float(item["score"]), str(item["memory_id"])))
41
+ return scored[:limit]
42
+
43
+ def list_semantic_neighbors(
44
+ self,
45
+ *,
46
+ repo_id: str,
47
+ include_global: bool,
48
+ anchor_memory_id: str,
49
+ kinds: Sequence[str] | None,
50
+ limit: int | None = None,
51
+ ) -> Sequence[dict[str, Any]]:
52
+ """This method returns implicit semantic neighbors for one anchor memory."""
53
+
54
+ visible_rows = self._visible_embedding_rows(repo_id=repo_id, include_global=include_global, kinds=kinds)
55
+ anchor_vector = next((row["vector"] for row in visible_rows if row["memory_id"] == anchor_memory_id), None)
56
+ if anchor_vector is None:
57
+ return []
58
+
59
+ scored: list[dict[str, Any]] = []
60
+ for row in visible_rows:
61
+ if row["memory_id"] == anchor_memory_id:
62
+ continue
63
+ score = _cosine_similarity(anchor_vector, row["vector"])
64
+ if score <= 0.0:
65
+ continue
66
+ scored.append({"memory_id": row["memory_id"], "score": score})
67
+ scored.sort(key=lambda item: (-float(item["score"]), str(item["memory_id"])))
68
+ if limit is None:
69
+ return scored
70
+ return scored[:limit]
71
+
72
+ def _visible_embedding_rows(self, *, repo_id: str, include_global: bool, kinds: Sequence[str] | None) -> list[dict[str, Any]]:
73
+ """Load visible embedded memories eligible for semantic retrieval."""
74
+
75
+ scope_values = ["repo", "global"] if include_global else ["repo"]
76
+ stmt = (
77
+ select(
78
+ memories.c.id.label("memory_id"),
79
+ memory_embeddings.c.vector,
80
+ )
81
+ .select_from(memories.join(memory_embeddings, memory_embeddings.c.memory_id == memories.c.id))
82
+ .where(
83
+ memories.c.repo_id == repo_id,
84
+ memories.c.archived.is_(False),
85
+ memories.c.scope.in_(scope_values),
86
+ )
87
+ )
88
+ if kinds:
89
+ stmt = stmt.where(memories.c.kind.in_(list(kinds)))
90
+
91
+ rows = self._session.execute(stmt).mappings().all()
92
+ return [
93
+ {
94
+ "memory_id": str(row["memory_id"]),
95
+ "vector": [float(value) for value in row["vector"]],
96
+ }
97
+ for row in rows
98
+ ]
99
+
100
+
101
+ def _cosine_similarity(left: list[float], right: list[float]) -> float:
102
+ """Compute cosine similarity for semantic retrieval ranking and gating."""
103
+
104
+ if len(left) != len(right):
105
+ return 0.0
106
+ left_norm = sqrt(sum(value * value for value in left))
107
+ right_norm = sqrt(sum(value * value for value in right))
108
+ if left_norm == 0 or right_norm == 0:
109
+ return 0.0
110
+ dot = sum(left_value * right_value for left_value, right_value in zip(left, right, strict=True))
111
+ return dot / (left_norm * right_norm)
@@ -0,0 +1,10 @@
1
+ """This module defines SQLAlchemy session-factory helpers for repository execution."""
2
+
3
+ from sqlalchemy.engine import Engine
4
+ from sqlalchemy.orm import Session, sessionmaker
5
+
6
+
7
+ def get_session_factory(engine: Engine) -> sessionmaker[Session]:
8
+ """This function creates a reusable session factory bound to an engine."""
9
+
10
+ return sessionmaker(bind=engine, future=True, expire_on_commit=False)