depthfusion 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. depthfusion/__init__.py +0 -0
  2. depthfusion/analytics/__init__.py +15 -0
  3. depthfusion/analytics/aggregation.py +336 -0
  4. depthfusion/analytics/budget.py +358 -0
  5. depthfusion/analytics/collector.py +160 -0
  6. depthfusion/analytics/model_stats.py +352 -0
  7. depthfusion/analytics/recommender.py +294 -0
  8. depthfusion/analytics/router.py +342 -0
  9. depthfusion/analytics/store.py +73 -0
  10. depthfusion/analyzer/__init__.py +0 -0
  11. depthfusion/analyzer/compatibility.py +348 -0
  12. depthfusion/analyzer/installer.py +179 -0
  13. depthfusion/analyzer/recommender.py +35 -0
  14. depthfusion/analyzer/scanner.py +127 -0
  15. depthfusion/api/__init__.py +0 -0
  16. depthfusion/api/admin_console.py +795 -0
  17. depthfusion/api/auth.py +164 -0
  18. depthfusion/api/events.py +313 -0
  19. depthfusion/api/query.py +417 -0
  20. depthfusion/api/rest.py +1028 -0
  21. depthfusion/api/role_admin.py +254 -0
  22. depthfusion/audit/__init__.py +16 -0
  23. depthfusion/audit/log.py +353 -0
  24. depthfusion/authz/__init__.py +52 -0
  25. depthfusion/authz/capability_check.py +191 -0
  26. depthfusion/authz/classification.py +201 -0
  27. depthfusion/authz/export_audit.py +525 -0
  28. depthfusion/authz/export_controls.py +217 -0
  29. depthfusion/authz/frontmatter.py +177 -0
  30. depthfusion/authz/label_mapping.py +263 -0
  31. depthfusion/authz/policy_engine.py +605 -0
  32. depthfusion/authz/policy_snapshot.py +340 -0
  33. depthfusion/authz/roles.py +327 -0
  34. depthfusion/backends/__init__.py +29 -0
  35. depthfusion/backends/base.py +139 -0
  36. depthfusion/backends/chain.py +266 -0
  37. depthfusion/backends/factory.py +267 -0
  38. depthfusion/backends/gemma.py +302 -0
  39. depthfusion/backends/haiku.py +297 -0
  40. depthfusion/backends/local_embedding.py +202 -0
  41. depthfusion/backends/null.py +65 -0
  42. depthfusion/backends/openrouter.py +132 -0
  43. depthfusion/cache/__init__.py +110 -0
  44. depthfusion/cache/activity_signals.py +322 -0
  45. depthfusion/cache/admission.py +278 -0
  46. depthfusion/cache/hit_rate.py +288 -0
  47. depthfusion/cache/lease_lifecycle.py +888 -0
  48. depthfusion/cache/manager.py +371 -0
  49. depthfusion/cache/models.py +102 -0
  50. depthfusion/cache/prefetch_scheduler.py +254 -0
  51. depthfusion/capture/__init__.py +0 -0
  52. depthfusion/capture/_metrics.py +66 -0
  53. depthfusion/capture/auto_learn.py +515 -0
  54. depthfusion/capture/compressor.py +141 -0
  55. depthfusion/capture/decay.py +266 -0
  56. depthfusion/capture/decision_extractor.py +394 -0
  57. depthfusion/capture/dedup.py +404 -0
  58. depthfusion/capture/event_hook.py +62 -0
  59. depthfusion/capture/negative_extractor.py +369 -0
  60. depthfusion/capture/pruner.py +344 -0
  61. depthfusion/cli/__init__.py +1 -0
  62. depthfusion/cli/devices.py +143 -0
  63. depthfusion/cli/migrate.py +438 -0
  64. depthfusion/cli/roles.py +303 -0
  65. depthfusion/cognitive/__init__.py +0 -0
  66. depthfusion/cognitive/consolidator.py +60 -0
  67. depthfusion/cognitive/contradiction.py +95 -0
  68. depthfusion/cognitive/scorer.py +56 -0
  69. depthfusion/connectors/__init__.py +19 -0
  70. depthfusion/connectors/sharepoint.py +756 -0
  71. depthfusion/connectors/sharepoint_scheduler.py +149 -0
  72. depthfusion/connectors/sharepoint_scope.py +186 -0
  73. depthfusion/connectors/sharepoint_state.py +128 -0
  74. depthfusion/core/__init__.py +0 -0
  75. depthfusion/core/config.py +256 -0
  76. depthfusion/core/event_store.py +658 -0
  77. depthfusion/core/feedback.py +317 -0
  78. depthfusion/core/file_locking.py +266 -0
  79. depthfusion/core/hit_tracker.py +123 -0
  80. depthfusion/core/memory.py +59 -0
  81. depthfusion/core/memory_object.py +187 -0
  82. depthfusion/core/project_context.py +122 -0
  83. depthfusion/core/project_ingest.py +204 -0
  84. depthfusion/core/project_registry.py +59 -0
  85. depthfusion/core/research.py +181 -0
  86. depthfusion/core/scoring.py +82 -0
  87. depthfusion/core/types.py +216 -0
  88. depthfusion/fusion/__init__.py +0 -0
  89. depthfusion/fusion/block_retrieval.py +192 -0
  90. depthfusion/fusion/chunk_state_compression.py +190 -0
  91. depthfusion/fusion/gates.py +460 -0
  92. depthfusion/fusion/materialisation_policy.py +210 -0
  93. depthfusion/fusion/reranker.py +68 -0
  94. depthfusion/fusion/rrf.py +47 -0
  95. depthfusion/fusion/selective_fusion_weighter.py +316 -0
  96. depthfusion/fusion/weighted.py +118 -0
  97. depthfusion/graph/__init__.py +1 -0
  98. depthfusion/graph/builder.py +102 -0
  99. depthfusion/graph/dedup.py +165 -0
  100. depthfusion/graph/extractor.py +234 -0
  101. depthfusion/graph/linker.py +339 -0
  102. depthfusion/graph/scope.py +43 -0
  103. depthfusion/graph/store.py +610 -0
  104. depthfusion/graph/traverser.py +196 -0
  105. depthfusion/graph/types.py +91 -0
  106. depthfusion/hooks/__init__.py +0 -0
  107. depthfusion/hooks/git_post_commit.py +249 -0
  108. depthfusion/hooks/post_tool_use.py +296 -0
  109. depthfusion/hooks/session_start.py +162 -0
  110. depthfusion/identity/__init__.py +59 -0
  111. depthfusion/identity/device_keychain.py +451 -0
  112. depthfusion/identity/device_lease.py +222 -0
  113. depthfusion/identity/device_registry.py +239 -0
  114. depthfusion/identity/errors.py +55 -0
  115. depthfusion/identity/fastapi_deps.py +117 -0
  116. depthfusion/identity/jwks_cache.py +159 -0
  117. depthfusion/identity/legacy_shim.py +204 -0
  118. depthfusion/identity/models.py +81 -0
  119. depthfusion/identity/oidc_client.py +483 -0
  120. depthfusion/identity/principal_store.py +156 -0
  121. depthfusion/identity/service_account.py +204 -0
  122. depthfusion/identity/token_validator.py +232 -0
  123. depthfusion/ingest/__init__.py +33 -0
  124. depthfusion/ingest/chunking.py +149 -0
  125. depthfusion/ingest/models.py +46 -0
  126. depthfusion/ingest/parser.py +294 -0
  127. depthfusion/ingest/pipeline.py +256 -0
  128. depthfusion/install/__init__.py +0 -0
  129. depthfusion/install/dep_checker.py +111 -0
  130. depthfusion/install/gpu_probe.py +238 -0
  131. depthfusion/install/install.py +1153 -0
  132. depthfusion/install/migrate.py +68 -0
  133. depthfusion/install/smoke.py +248 -0
  134. depthfusion/install/ui_server.py +399 -0
  135. depthfusion/mcp/__init__.py +0 -0
  136. depthfusion/mcp/authz.py +195 -0
  137. depthfusion/mcp/cognitive_tools.py +79 -0
  138. depthfusion/mcp/http_server.py +257 -0
  139. depthfusion/mcp/server.py +418 -0
  140. depthfusion/mcp/skillforge_client.py +86 -0
  141. depthfusion/mcp/tools/__init__.py +23 -0
  142. depthfusion/mcp/tools/_registry.py +603 -0
  143. depthfusion/mcp/tools/_shared.py +718 -0
  144. depthfusion/mcp/tools/_state.py +139 -0
  145. depthfusion/mcp/tools/analytics_tools.py +9 -0
  146. depthfusion/mcp/tools/bridge.py +90 -0
  147. depthfusion/mcp/tools/capture.py +364 -0
  148. depthfusion/mcp/tools/decisions.py +206 -0
  149. depthfusion/mcp/tools/graph.py +509 -0
  150. depthfusion/mcp/tools/model_stats_tool.py +24 -0
  151. depthfusion/mcp/tools/project.py +250 -0
  152. depthfusion/mcp/tools/recall.py +219 -0
  153. depthfusion/mcp/tools/recommender_tools.py +60 -0
  154. depthfusion/mcp/tools/system.py +85 -0
  155. depthfusion/mcp/tools/telemetry.py +341 -0
  156. depthfusion/mcp/tools/telemetry_tools.py +43 -0
  157. depthfusion/metrics/__init__.py +0 -0
  158. depthfusion/metrics/aggregator.py +341 -0
  159. depthfusion/metrics/collector.py +522 -0
  160. depthfusion/migrations/__init__.py +11 -0
  161. depthfusion/parsers/__init__.py +69 -0
  162. depthfusion/parsers/base.py +49 -0
  163. depthfusion/parsers/chatgpt.py +107 -0
  164. depthfusion/parsers/deepseek.py +103 -0
  165. depthfusion/parsers/documents/__init__.py +107 -0
  166. depthfusion/parsers/documents/base.py +354 -0
  167. depthfusion/parsers/documents/docx.py +122 -0
  168. depthfusion/parsers/documents/generic.py +173 -0
  169. depthfusion/parsers/documents/ocr.py +191 -0
  170. depthfusion/parsers/documents/pdf.py +113 -0
  171. depthfusion/parsers/documents/pptx.py +103 -0
  172. depthfusion/parsers/documents/xlsx.py +150 -0
  173. depthfusion/parsers/gemini.py +82 -0
  174. depthfusion/parsers/generic.py +138 -0
  175. depthfusion/recursive/__init__.py +0 -0
  176. depthfusion/recursive/client.py +346 -0
  177. depthfusion/recursive/sandbox.py +78 -0
  178. depthfusion/recursive/sidecar.py +79 -0
  179. depthfusion/recursive/strategies.py +45 -0
  180. depthfusion/recursive/trajectory.py +40 -0
  181. depthfusion/retrieval/__init__.py +17 -0
  182. depthfusion/retrieval/acl_verifier.py +204 -0
  183. depthfusion/retrieval/bm25.py +130 -0
  184. depthfusion/retrieval/hnsw_store.py +509 -0
  185. depthfusion/retrieval/hybrid.py +942 -0
  186. depthfusion/retrieval/reranker.py +99 -0
  187. depthfusion/router/__init__.py +0 -0
  188. depthfusion/router/bus.py +302 -0
  189. depthfusion/router/cost_estimator.py +83 -0
  190. depthfusion/router/dispatcher.py +49 -0
  191. depthfusion/router/publisher.py +35 -0
  192. depthfusion/router/subscriber.py +17 -0
  193. depthfusion/session/__init__.py +0 -0
  194. depthfusion/session/compactor.py +91 -0
  195. depthfusion/session/loader.py +84 -0
  196. depthfusion/session/scorer.py +69 -0
  197. depthfusion/session/tagger.py +169 -0
  198. depthfusion/storage/__init__.py +3 -0
  199. depthfusion/storage/event_log.py +92 -0
  200. depthfusion/storage/file_index.py +318 -0
  201. depthfusion/storage/memory_store.py +306 -0
  202. depthfusion/storage/telemetry_store.py +336 -0
  203. depthfusion/storage/tier_manager.py +66 -0
  204. depthfusion/storage/vector_store.py +238 -0
  205. depthfusion/sync/__init__.py +4 -0
  206. depthfusion/sync/engine.py +497 -0
  207. depthfusion/sync/router.py +177 -0
  208. depthfusion/telemetry/__init__.py +2 -0
  209. depthfusion/telemetry/recorder.py +142 -0
  210. depthfusion/telemetry/schema.py +65 -0
  211. depthfusion/utils/__init__.py +0 -0
  212. depthfusion/utils/expression_eval.py +257 -0
  213. depthfusion/utils/mode.py +31 -0
  214. depthfusion-2.0.0.dist-info/METADATA +80 -0
  215. depthfusion-2.0.0.dist-info/RECORD +219 -0
  216. depthfusion-2.0.0.dist-info/WHEEL +5 -0
  217. depthfusion-2.0.0.dist-info/entry_points.txt +2 -0
  218. depthfusion-2.0.0.dist-info/licenses/LICENSE +21 -0
  219. depthfusion-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,358 @@
1
+ """Budget-aware model selection for the PM dispatch cycle — S-211 (T-722..T-724).
2
+
3
+ This module closes the budget feedback loop for ``/digittal-method`` runs:
4
+
5
+ 1. ``Budget`` tracks a spend cap and accumulated spend, exposing
6
+ ``remaining()`` (AC-5, T-723).
7
+ 2. ``select_model_for_task`` calls the DepthFusion recommender with the
8
+ per-task ``budget_usd`` and Fable-5 ``exclude_vendors`` filter, returning
9
+ the chosen model plus a ``budget_warning`` flag (T-720 integration).
10
+ 3. ``budget_alert`` checks, *before* each dispatch, whether the remaining
11
+ budget can afford the cheapest eligible model (T-723, AC-5). When it
12
+ cannot, the PM surfaces an alert instead of dispatching into an OOM.
13
+ 4. ``log_dispatch_outcome`` records the *actual* verdict and cost after the
14
+ agent completes via ``record_model_telemetry`` — closing the feedback loop
15
+ (T-722, AC-4) — and debits the real cost from the budget.
16
+ 5. ``build_budget_summary`` produces the human-readable spend-vs-baseline
17
+ report served by ``GET /api/budget-summary`` (T-724, AC-6).
18
+
19
+ The module has no hard dependency on the MCP transport: ``record_model_telemetry``
20
+ is the same callable used by the MCP tool, so the feedback loop works whether
21
+ the PM is local or remote.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ from dataclasses import dataclass, field
26
+ from typing import Any, Callable, Optional
27
+
28
+ from depthfusion.analytics.model_stats import get_model_stats
29
+ from depthfusion.analytics.recommender import recommend, vendor_for_model
30
+
31
+ # The default ("sonnet baseline") used by build_budget_summary to compute how
32
+ # much each non-baseline choice saved or cost relative to always picking
33
+ # Sonnet. Matches the fable5 tier table default dev model.
34
+ DEFAULT_BASELINE_MODEL = "claude-sonnet-4"
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Budget tracking
39
+ # ---------------------------------------------------------------------------
40
+
41
+ @dataclass
42
+ class Budget:
43
+ """Tracks a spend cap and accumulated spend across a dispatch run.
44
+
45
+ Parameters
46
+ ----------
47
+ cap_usd:
48
+ The total spend cap for the run (e.g. ``$5`` for a 5-task run).
49
+ spent_usd:
50
+ Spend accumulated so far (debited via :meth:`debit`).
51
+ """
52
+
53
+ cap_usd: float
54
+ spent_usd: float = 0.0
55
+ _entries: list[dict[str, Any]] = field(default_factory=list)
56
+
57
+ def remaining(self) -> float:
58
+ """USD remaining under the cap (never negative for display)."""
59
+ return self.cap_usd - self.spent_usd
60
+
61
+ def debit(self, cost_usd: float, *, model_id: str = "", task: str = "") -> float:
62
+ """Debit *cost_usd* from the budget and return the new remaining.
63
+
64
+ Negative costs are clamped to 0 to avoid crediting the budget on a
65
+ malformed outcome record.
66
+ """
67
+ cost = max(0.0, float(cost_usd))
68
+ self.spent_usd += cost
69
+ self._entries.append({"model_id": model_id, "task": task, "cost_usd": cost})
70
+ return self.remaining()
71
+
72
+ @property
73
+ def entries(self) -> list[dict[str, Any]]:
74
+ """Per-dispatch spend records (model_id, task, cost_usd)."""
75
+ return list(self._entries)
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Eligible-model cost helpers (T-723)
80
+ # ---------------------------------------------------------------------------
81
+
82
+ def _eligible_stats(
83
+ *,
84
+ exclude_vendors: Optional[list[str]] = None,
85
+ available_models: Optional[list[str]] = None,
86
+ ) -> list[dict[str, Any]]:
87
+ """Return one stat row per candidate model after vendor exclusion.
88
+
89
+ Prefers the ``observed`` row, falls back to ``prior``. Excludes any model
90
+ whose vendor is in *exclude_vendors* (Fable-5 isolation).
91
+ """
92
+ excluded = {v.lower() for v in (exclude_vendors or [])}
93
+ all_stats = get_model_stats()
94
+
95
+ if available_models:
96
+ candidates = list(dict.fromkeys(available_models))
97
+ else:
98
+ candidates = sorted({s["model_id"] for s in all_stats})
99
+
100
+ chosen: dict[str, dict[str, Any]] = {}
101
+ for model_id in candidates:
102
+ if vendor_for_model(model_id) in excluded:
103
+ continue
104
+ rows = [s for s in all_stats if s["model_id"] == model_id]
105
+ observed = [r for r in rows if r["source"] == "observed"]
106
+ prior = [r for r in rows if r["source"] == "prior"]
107
+ pick = observed[0] if observed else (prior[0] if prior else None)
108
+ if pick is not None:
109
+ chosen[model_id] = pick
110
+ return list(chosen.values())
111
+
112
+
113
+ def min_eligible_cost(
114
+ *,
115
+ exclude_vendors: Optional[list[str]] = None,
116
+ available_models: Optional[list[str]] = None,
117
+ ) -> Optional[float]:
118
+ """Cheapest ``avg_cost_usd`` among eligible models, or ``None`` if none."""
119
+ stats = _eligible_stats(
120
+ exclude_vendors=exclude_vendors, available_models=available_models
121
+ )
122
+ costs = [float(s["avg_cost_usd"]) for s in stats if s.get("avg_cost_usd") is not None]
123
+ return min(costs) if costs else None
124
+
125
+
126
+ def budget_alert(
127
+ budget: Budget,
128
+ *,
129
+ exclude_vendors: Optional[list[str]] = None,
130
+ available_models: Optional[list[str]] = None,
131
+ ) -> dict[str, Any]:
132
+ """Pre-dispatch budget check (T-723, AC-5).
133
+
134
+ Returns ``{"ok": bool, "remaining": float, "min_cost": float|None, "message": str}``.
135
+ ``ok`` is ``False`` when the remaining budget cannot afford the cheapest
136
+ eligible model — the PM must surface the alert and NOT dispatch.
137
+ """
138
+ remaining = budget.remaining()
139
+ min_cost = min_eligible_cost(
140
+ exclude_vendors=exclude_vendors, available_models=available_models
141
+ )
142
+ if min_cost is None:
143
+ return {
144
+ "ok": False,
145
+ "remaining": remaining,
146
+ "min_cost": None,
147
+ "message": "No eligible models available after vendor exclusion.",
148
+ }
149
+ ok = remaining >= min_cost
150
+ if ok:
151
+ message = (
152
+ f"Budget OK: ${remaining:.4f} remaining covers cheapest eligible "
153
+ f"model at ${min_cost:.4f}."
154
+ )
155
+ else:
156
+ message = (
157
+ f"Budget alert: ${remaining:.4f} remaining is below the cheapest "
158
+ f"eligible model (${min_cost:.4f}). Halt dispatch and surface to user."
159
+ )
160
+ return {"ok": ok, "remaining": remaining, "min_cost": min_cost, "message": message}
161
+
162
+
163
+ # ---------------------------------------------------------------------------
164
+ # Model selection (wraps the recommender with budget) — feeds the PM cycle
165
+ # ---------------------------------------------------------------------------
166
+
167
+ def select_model_for_task(
168
+ *,
169
+ task_category: str,
170
+ budget: Budget,
171
+ remaining_tasks: int,
172
+ exclude_vendors: Optional[list[str]] = None,
173
+ available_models: Optional[list[str]] = None,
174
+ context: str = "",
175
+ ) -> Optional[dict[str, Any]]:
176
+ """Pick the best model for the next task within the per-task budget.
177
+
178
+ The per-task ``budget_usd`` is ``budget.remaining / remaining_tasks`` (AC-2),
179
+ floored at the full remaining budget for the final task. Returns the
180
+ top-ranked recommendation dict (including ``budget_warning``) or ``None``
181
+ when no model can be recommended.
182
+ """
183
+ remaining_tasks = max(1, int(remaining_tasks))
184
+ per_task_budget = budget.remaining() / remaining_tasks
185
+ recs = recommend(
186
+ task_category=task_category,
187
+ context=context,
188
+ exclude_vendors=exclude_vendors,
189
+ available_models=available_models,
190
+ budget_usd=per_task_budget,
191
+ )
192
+ if not recs:
193
+ return None
194
+ return recs[0]
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Outcome logging (T-722, AC-4) — closes the feedback loop
199
+ # ---------------------------------------------------------------------------
200
+
201
+ def log_dispatch_outcome(
202
+ *,
203
+ session_id: str,
204
+ model_id: str,
205
+ task_category: str,
206
+ quality_verdict: str,
207
+ cost_usd: float,
208
+ tokens_in: int = 0,
209
+ tokens_out: int = 0,
210
+ latency_ms: int = 0,
211
+ project_slug: Optional[str] = None,
212
+ budget: Optional[Budget] = None,
213
+ recorder: Optional[Callable[[dict], dict]] = None,
214
+ ) -> dict[str, Any]:
215
+ """Record the actual dispatch outcome to DepthFusion telemetry (AC-4).
216
+
217
+ Calls ``record_model_telemetry`` (the same callable the MCP tool uses) with
218
+ the real verdict and cost, debits the cost from *budget* when supplied, and
219
+ returns the recorder result. ``recorder`` is injectable for testing.
220
+
221
+ All external calls are wrapped so a telemetry failure surfaces as an error
222
+ dict rather than crashing the PM dispatch cycle.
223
+ """
224
+ if recorder is None:
225
+ from depthfusion.mcp.tools.telemetry_tools import record_model_telemetry as recorder
226
+
227
+ event = {
228
+ "session_id": session_id,
229
+ "model_id": model_id,
230
+ "task_category": task_category,
231
+ "tokens_in": int(tokens_in),
232
+ "tokens_out": int(tokens_out),
233
+ "latency_ms": int(latency_ms),
234
+ "cost_usd": float(cost_usd),
235
+ "quality_verdict": quality_verdict,
236
+ }
237
+ if project_slug is not None:
238
+ event["project_slug"] = project_slug
239
+
240
+ try:
241
+ result = recorder(event) # type: ignore[misc]
242
+ except Exception as exc: # pragma: no cover - defensive
243
+ result = {"error": f"telemetry record failed: {exc}"}
244
+
245
+ if budget is not None and not (isinstance(result, dict) and result.get("error")):
246
+ budget.debit(cost_usd, model_id=model_id, task=task_category)
247
+
248
+ return result
249
+
250
+
251
+ # ---------------------------------------------------------------------------
252
+ # Budget summary (T-724, AC-6)
253
+ # ---------------------------------------------------------------------------
254
+
255
+ def _baseline_cost(baseline_model: str) -> float:
256
+ """avg_cost_usd for the baseline model (observed, else prior)."""
257
+ stats = get_model_stats(model_id=baseline_model)
258
+ observed = [s for s in stats if s["source"] == "observed"]
259
+ prior = [s for s in stats if s["source"] == "prior"]
260
+ pick = observed[0] if observed else (prior[0] if prior else None)
261
+ if pick is None:
262
+ return 0.0
263
+ return float(pick.get("avg_cost_usd") or 0.0)
264
+
265
+
266
+ def build_budget_summary(
267
+ *,
268
+ cap_usd: Optional[float] = None,
269
+ project_slug: Optional[str] = None,
270
+ session_id: Optional[str] = None,
271
+ baseline_model: str = DEFAULT_BASELINE_MODEL,
272
+ telemetry_rows: Optional[list[dict[str, Any]]] = None,
273
+ ) -> dict[str, Any]:
274
+ """Human-readable spend vs. recommendations summary (AC-6, T-724).
275
+
276
+ Reads actual spend from telemetry rows (filtered by project/session) and
277
+ compares each model choice against the Sonnet baseline cost. Returns
278
+ actual spend, remaining (when ``cap_usd`` given), and per-model savings.
279
+
280
+ ``telemetry_rows`` is injectable for testing; when ``None`` the live
281
+ ``model_telemetry`` table is queried.
282
+ """
283
+ if telemetry_rows is None:
284
+ telemetry_rows = _query_telemetry_rows(
285
+ project_slug=project_slug, session_id=session_id
286
+ )
287
+
288
+ baseline = _baseline_cost(baseline_model)
289
+
290
+ actual_spend = 0.0
291
+ per_model: dict[str, dict[str, Any]] = {}
292
+ for row in telemetry_rows:
293
+ cost = float(row.get("cost_usd") or 0.0)
294
+ actual_spend += cost
295
+ mid = row.get("model_id", "unknown")
296
+ bucket = per_model.setdefault(
297
+ mid,
298
+ {"model_id": mid, "dispatches": 0, "spend_usd": 0.0, "saved_vs_baseline_usd": 0.0},
299
+ )
300
+ bucket["dispatches"] += 1
301
+ bucket["spend_usd"] += cost
302
+ # Positive = cheaper than baseline (saved); negative = pricier (cost more).
303
+ bucket["saved_vs_baseline_usd"] += baseline - cost
304
+
305
+ summary: dict[str, Any] = {
306
+ "project_slug": project_slug,
307
+ "session_id": session_id,
308
+ "baseline_model": baseline_model,
309
+ "baseline_cost_usd": baseline,
310
+ "actual_spend_usd": round(actual_spend, 6),
311
+ "dispatch_count": len(telemetry_rows),
312
+ "by_model": [
313
+ {
314
+ "model_id": b["model_id"],
315
+ "dispatches": b["dispatches"],
316
+ "spend_usd": round(b["spend_usd"], 6),
317
+ "saved_vs_baseline_usd": round(b["saved_vs_baseline_usd"], 6),
318
+ }
319
+ for b in sorted(per_model.values(), key=lambda x: -x["spend_usd"])
320
+ ],
321
+ }
322
+ if cap_usd is not None:
323
+ summary["cap_usd"] = float(cap_usd)
324
+ summary["remaining_usd"] = round(float(cap_usd) - actual_spend, 6)
325
+ return summary
326
+
327
+
328
+ def _query_telemetry_rows(
329
+ *,
330
+ project_slug: Optional[str] = None,
331
+ session_id: Optional[str] = None,
332
+ ) -> list[dict[str, Any]]:
333
+ """Query model_telemetry rows filtered by project_slug and/or session_id."""
334
+ from contextlib import closing
335
+
336
+ from depthfusion.telemetry import schema
337
+
338
+ schema.migrate()
339
+ where: list[str] = []
340
+ params: list[Any] = []
341
+ if project_slug is not None:
342
+ where.append("project_slug = ?")
343
+ params.append(project_slug)
344
+ if session_id is not None:
345
+ where.append("session_id = ?")
346
+ params.append(session_id)
347
+
348
+ query = "SELECT * FROM model_telemetry"
349
+ if where:
350
+ query += " WHERE " + " AND ".join(where)
351
+ query += " ORDER BY recorded_at ASC"
352
+
353
+ try:
354
+ with closing(schema.connect()) as conn:
355
+ rows = conn.execute(query, params).fetchall()
356
+ return [dict(r) for r in rows]
357
+ except Exception: # pragma: no cover - defensive
358
+ return []
@@ -0,0 +1,160 @@
1
+ """AnalyticsCollector — records usage events per principal (E-55).
2
+
3
+ Each call to :meth:`record_event` appends one row to ``analytics_events``
4
+ with the principal_id, event_type, and UTC timestamp.
5
+
6
+ Supported event types
7
+ ---------------------
8
+ ``search`` — a recall / search query was executed
9
+ ``ingest`` — a document batch was ingested
10
+ ``sync`` — a connector sync run completed
11
+
12
+ Unknown types are accepted and stored verbatim so callers do not need a
13
+ code change to introduce new event types; the aggregation layer will bucket
14
+ unknown types under ``other`` in summary output.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import threading
20
+ from contextlib import closing
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+
24
+ from .store import _connect, init_db
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ #: The canonical set of event types this collector understands.
29
+ KNOWN_EVENT_TYPES: frozenset[str] = frozenset({"search", "ingest", "sync"})
30
+
31
+
32
+ class AnalyticsCollector:
33
+ """Thread-safe recorder of principal usage events.
34
+
35
+ Parameters
36
+ ----------
37
+ db_path:
38
+ Path to the SQLite database file. Created (with parent dirs) if it
39
+ does not exist.
40
+ """
41
+
42
+ def __init__(self, db_path: Path) -> None:
43
+ self._db_path = Path(db_path)
44
+ self._lock = threading.Lock()
45
+ init_db(self._db_path)
46
+
47
+ # ------------------------------------------------------------------
48
+ # Public API
49
+ # ------------------------------------------------------------------
50
+
51
+ def record_event(
52
+ self,
53
+ *,
54
+ principal_id: str,
55
+ event_type: str,
56
+ recorded_at: datetime | None = None,
57
+ ) -> None:
58
+ """Append one usage event to the database.
59
+
60
+ Parameters
61
+ ----------
62
+ principal_id:
63
+ Stable identifier for the authenticated caller (``sub`` claim).
64
+ event_type:
65
+ One of ``search``, ``ingest``, ``sync`` (or any custom string).
66
+ recorded_at:
67
+ Timestamp to record; defaults to ``datetime.now(UTC)``.
68
+
69
+ Errors are swallowed and logged — observability must never block
70
+ serving.
71
+ """
72
+ if recorded_at is None:
73
+ recorded_at = datetime.now(tz=timezone.utc)
74
+ ts = recorded_at.isoformat()
75
+
76
+ try:
77
+ with self._lock, closing(_connect(self._db_path)) as conn:
78
+ conn.execute(
79
+ "INSERT INTO analytics_events (principal_id, event_type, recorded_at)"
80
+ " VALUES (?, ?, ?)",
81
+ (principal_id, event_type, ts),
82
+ )
83
+ conn.commit()
84
+ except Exception: # noqa: BLE001 — observability must not raise
85
+ logger.exception(
86
+ "analytics: failed to record event type=%r for principal=%r",
87
+ event_type,
88
+ principal_id,
89
+ )
90
+
91
+ def recent_events(
92
+ self,
93
+ *,
94
+ principal_id: str,
95
+ since: datetime,
96
+ event_type: str | None = None,
97
+ ) -> list[dict]:
98
+ """Return raw events for *principal_id* since *since*.
99
+
100
+ Used by the aggregation service and in tests.
101
+
102
+ Parameters
103
+ ----------
104
+ principal_id:
105
+ Filter to this principal only.
106
+ since:
107
+ Lower bound (inclusive) on ``recorded_at``.
108
+ event_type:
109
+ Optional filter; ``None`` returns all event types.
110
+ """
111
+ since_ts = since.isoformat()
112
+ try:
113
+ with closing(_connect(self._db_path)) as conn:
114
+ if event_type is not None:
115
+ rows = conn.execute(
116
+ "SELECT principal_id, event_type, recorded_at"
117
+ " FROM analytics_events"
118
+ " WHERE principal_id = ? AND event_type = ? AND recorded_at >= ?"
119
+ " ORDER BY recorded_at",
120
+ (principal_id, event_type, since_ts),
121
+ ).fetchall()
122
+ else:
123
+ rows = conn.execute(
124
+ "SELECT principal_id, event_type, recorded_at"
125
+ " FROM analytics_events"
126
+ " WHERE principal_id = ? AND recorded_at >= ?"
127
+ " ORDER BY recorded_at",
128
+ (principal_id, since_ts),
129
+ ).fetchall()
130
+ return [dict(r) for r in rows]
131
+ except Exception: # noqa: BLE001
132
+ logger.exception(
133
+ "analytics: failed to query recent events for principal=%r", principal_id
134
+ )
135
+ return []
136
+
137
+ def count_events(
138
+ self,
139
+ *,
140
+ principal_id: str,
141
+ event_type: str,
142
+ since: datetime,
143
+ ) -> int:
144
+ """Return the count of *event_type* events for *principal_id* since *since*."""
145
+ since_ts = since.isoformat()
146
+ try:
147
+ with closing(_connect(self._db_path)) as conn:
148
+ row = conn.execute(
149
+ "SELECT COUNT(*) FROM analytics_events"
150
+ " WHERE principal_id = ? AND event_type = ? AND recorded_at >= ?",
151
+ (principal_id, event_type, since_ts),
152
+ ).fetchone()
153
+ return int(row[0]) if row else 0
154
+ except Exception: # noqa: BLE001
155
+ logger.exception(
156
+ "analytics: failed to count events type=%r for principal=%r",
157
+ event_type,
158
+ principal_id,
159
+ )
160
+ return 0