devrel-origin 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. devrel_origin/__init__.py +15 -0
  2. devrel_origin/cli/__init__.py +92 -0
  3. devrel_origin/cli/_common.py +243 -0
  4. devrel_origin/cli/analytics.py +28 -0
  5. devrel_origin/cli/argus.py +497 -0
  6. devrel_origin/cli/auth.py +227 -0
  7. devrel_origin/cli/config.py +108 -0
  8. devrel_origin/cli/content.py +259 -0
  9. devrel_origin/cli/cost.py +108 -0
  10. devrel_origin/cli/cro.py +298 -0
  11. devrel_origin/cli/deliverables.py +65 -0
  12. devrel_origin/cli/docs.py +91 -0
  13. devrel_origin/cli/doctor.py +178 -0
  14. devrel_origin/cli/experiment.py +29 -0
  15. devrel_origin/cli/growth.py +97 -0
  16. devrel_origin/cli/init.py +472 -0
  17. devrel_origin/cli/intel.py +27 -0
  18. devrel_origin/cli/kb.py +96 -0
  19. devrel_origin/cli/listen.py +31 -0
  20. devrel_origin/cli/marketing.py +66 -0
  21. devrel_origin/cli/migrate.py +45 -0
  22. devrel_origin/cli/run.py +46 -0
  23. devrel_origin/cli/sales.py +57 -0
  24. devrel_origin/cli/schedule.py +62 -0
  25. devrel_origin/cli/synthesize.py +28 -0
  26. devrel_origin/cli/triage.py +29 -0
  27. devrel_origin/cli/video.py +35 -0
  28. devrel_origin/core/__init__.py +58 -0
  29. devrel_origin/core/agent_config.py +75 -0
  30. devrel_origin/core/argus.py +964 -0
  31. devrel_origin/core/atlas.py +1450 -0
  32. devrel_origin/core/base.py +372 -0
  33. devrel_origin/core/cyra.py +563 -0
  34. devrel_origin/core/dex.py +708 -0
  35. devrel_origin/core/echo.py +614 -0
  36. devrel_origin/core/growth/__init__.py +27 -0
  37. devrel_origin/core/growth/recommendations.py +219 -0
  38. devrel_origin/core/growth/target_kinds.py +51 -0
  39. devrel_origin/core/iris.py +513 -0
  40. devrel_origin/core/kai.py +1367 -0
  41. devrel_origin/core/llm.py +542 -0
  42. devrel_origin/core/llm_backends.py +274 -0
  43. devrel_origin/core/mox.py +514 -0
  44. devrel_origin/core/nova.py +349 -0
  45. devrel_origin/core/pax.py +1205 -0
  46. devrel_origin/core/rex.py +532 -0
  47. devrel_origin/core/sage.py +486 -0
  48. devrel_origin/core/sentinel.py +385 -0
  49. devrel_origin/core/types.py +98 -0
  50. devrel_origin/core/video/__init__.py +22 -0
  51. devrel_origin/core/video/assembler.py +131 -0
  52. devrel_origin/core/video/browser_recorder.py +118 -0
  53. devrel_origin/core/video/desktop_recorder.py +254 -0
  54. devrel_origin/core/video/overlay_renderer.py +143 -0
  55. devrel_origin/core/video/script_parser.py +147 -0
  56. devrel_origin/core/video/tts_engine.py +82 -0
  57. devrel_origin/core/vox.py +268 -0
  58. devrel_origin/core/watchdog.py +321 -0
  59. devrel_origin/project/__init__.py +1 -0
  60. devrel_origin/project/config.py +75 -0
  61. devrel_origin/project/cost_sink.py +61 -0
  62. devrel_origin/project/init.py +104 -0
  63. devrel_origin/project/paths.py +75 -0
  64. devrel_origin/project/state.py +241 -0
  65. devrel_origin/project/templates/__init__.py +4 -0
  66. devrel_origin/project/templates/config.toml +24 -0
  67. devrel_origin/project/templates/devrel.gitignore +10 -0
  68. devrel_origin/project/templates/slop-blocklist.md +45 -0
  69. devrel_origin/project/templates/style.md +24 -0
  70. devrel_origin/project/templates/voice.md +29 -0
  71. devrel_origin/quality/__init__.py +66 -0
  72. devrel_origin/quality/editorial.py +357 -0
  73. devrel_origin/quality/persona.py +84 -0
  74. devrel_origin/quality/readability.py +148 -0
  75. devrel_origin/quality/slop.py +167 -0
  76. devrel_origin/quality/style.py +110 -0
  77. devrel_origin/quality/voice.py +15 -0
  78. devrel_origin/tools/__init__.py +9 -0
  79. devrel_origin/tools/analytics.py +304 -0
  80. devrel_origin/tools/api_client.py +393 -0
  81. devrel_origin/tools/apollo_client.py +305 -0
  82. devrel_origin/tools/code_validator.py +428 -0
  83. devrel_origin/tools/github_tools.py +297 -0
  84. devrel_origin/tools/instantly_client.py +412 -0
  85. devrel_origin/tools/kb_harvester.py +340 -0
  86. devrel_origin/tools/mcp_server.py +578 -0
  87. devrel_origin/tools/notifications.py +245 -0
  88. devrel_origin/tools/run_report.py +193 -0
  89. devrel_origin/tools/scheduler.py +231 -0
  90. devrel_origin/tools/search_tools.py +321 -0
  91. devrel_origin/tools/self_improve.py +168 -0
  92. devrel_origin/tools/sheets.py +236 -0
  93. devrel_origin-0.2.14.dist-info/METADATA +354 -0
  94. devrel_origin-0.2.14.dist-info/RECORD +98 -0
  95. devrel_origin-0.2.14.dist-info/WHEEL +5 -0
  96. devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
  97. devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
  98. devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,964 @@
1
+ """
2
+ Argus — Content Performance Analyst Agent.
3
+
4
+ Pulls post-publish performance data from PostHog, GitHub, Instantly, and
5
+ Echo's social_mentions table; ranks content deterministically; and emits
6
+ structured optimization recommendations via a single Sonnet call.
7
+
8
+ Sits beside Watchdog (infra) and Sentinel (pre-publish) as the
9
+ post-publish watcher in the 13-agent pantheon.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import json
16
+ import logging
17
+ import sqlite3
18
+ import statistics
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+ from pathlib import Path
22
+ from typing import Any, Literal, Optional
23
+
24
+ from devrel_origin.core.base import load_agent_prompt, strip_markdown_fences
25
+ from devrel_origin.core.growth import Pillar, TargetKind
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ _ANOMALY_Z_THRESHOLD = 2.5
30
+
31
+ ContentType = Literal["blog", "landing", "social", "email", "repo", "video"]
32
+ RecAction = Literal[
33
+ "double_down",
34
+ "retire",
35
+ "rewrite",
36
+ "retest",
37
+ "amplify",
38
+ "investigate",
39
+ ]
40
+ TargetType = Literal["content", "theme", "channel"]
41
+
42
+
43
+ @dataclass
44
+ class PerformanceMetric:
45
+ """Single content piece's performance snapshot for one period."""
46
+
47
+ content_id: str
48
+ content_type: ContentType
49
+ title: str
50
+ url: str | None
51
+ published_at: datetime
52
+ primary_metric: float
53
+ metric_name: str
54
+ secondary_metrics: dict[str, float] = field(default_factory=dict)
55
+ percentile: float | None = None
56
+ wow_delta: float | None = None
57
+ anomaly_flag: bool = False
58
+
59
+
60
+ @dataclass
61
+ class Recommendation:
62
+ """One optimization recommendation tied to one target.
63
+
64
+ ``source_ids`` is the list of ``content_id`` values that back this
65
+ recommendation. v1 uses these only for display; v2 (closed-loop routing)
66
+ uses them so Iris/Mox/Nova can resolve the rec to actionable artifacts
67
+ without re-parsing the free-text ``target``.
68
+ """
69
+
70
+ action: RecAction
71
+ target: str
72
+ target_type: TargetType
73
+ rationale: str
74
+ evidence: list[str]
75
+ confidence: float
76
+ source_ids: list[str] = field(default_factory=list)
77
+ first_seen_period: str | None = None # set by _persist_sync; ISO timestamp
78
+
79
+
80
+ @dataclass
81
+ class PerformanceReport:
82
+ """Full Argus run output. Serialized to .devrel/state.db and to markdown."""
83
+
84
+ period_start: datetime
85
+ period_end: datetime
86
+ top_performers: list[PerformanceMetric]
87
+ bottom_performers: list[PerformanceMetric]
88
+ trend_signals: list[str]
89
+ recommendations: list[Recommendation]
90
+ sources_ok: dict[str, bool]
91
+ insufficient_data: bool = False
92
+ llm_error: str | None = None
93
+
94
+ def to_json(self) -> dict:
95
+ return _report_to_jsonable(self)
96
+
97
+ def to_markdown(self) -> str:
98
+ return _render_markdown(self)
99
+
100
+
101
+ def _metric_to_jsonable(m: PerformanceMetric) -> dict:
102
+ return {
103
+ "content_id": m.content_id,
104
+ "content_type": m.content_type,
105
+ "title": m.title,
106
+ "url": m.url,
107
+ "published_at": m.published_at.isoformat(),
108
+ "primary_metric": m.primary_metric,
109
+ "metric_name": m.metric_name,
110
+ "secondary_metrics": dict(m.secondary_metrics),
111
+ "percentile": m.percentile,
112
+ "wow_delta": m.wow_delta,
113
+ "anomaly_flag": m.anomaly_flag,
114
+ }
115
+
116
+
117
+ def _rec_to_jsonable(r: Recommendation) -> dict:
118
+ return {
119
+ "action": r.action,
120
+ "target": r.target,
121
+ "target_type": r.target_type,
122
+ "rationale": r.rationale,
123
+ "evidence": list(r.evidence),
124
+ "confidence": r.confidence,
125
+ "source_ids": list(r.source_ids),
126
+ "first_seen_period": r.first_seen_period,
127
+ }
128
+
129
+
130
+ def _report_to_jsonable(r: PerformanceReport) -> dict:
131
+ return {
132
+ "period_start": r.period_start.isoformat(),
133
+ "period_end": r.period_end.isoformat(),
134
+ "top_performers": [_metric_to_jsonable(m) for m in r.top_performers],
135
+ "bottom_performers": [_metric_to_jsonable(m) for m in r.bottom_performers],
136
+ "trend_signals": list(r.trend_signals),
137
+ "recommendations": [_rec_to_jsonable(rec) for rec in r.recommendations],
138
+ "sources_ok": dict(r.sources_ok),
139
+ "insufficient_data": r.insufficient_data,
140
+ "llm_error": r.llm_error,
141
+ }
142
+
143
+
144
+ _REC_ACTION_ORDER: tuple[str, ...] = (
145
+ "double_down",
146
+ "amplify",
147
+ "rewrite",
148
+ "retest",
149
+ "retire",
150
+ "investigate",
151
+ )
152
+
153
+ # Recommendations that warrant a downstream content brief (a Mox/Kai-ready
154
+ # prompt staged on disk). Excluded: retire/investigate (not actionable as
155
+ # a content task) and retest (Nova's domain, separate artifact).
156
+ _BRIEF_ACTIONS: frozenset[str] = frozenset({"double_down", "amplify", "rewrite"})
157
+
158
+
159
+ def _action_to_brief_intent(action: str) -> str:
160
+ return {
161
+ "double_down": "Produce a new piece in the same theme/format.",
162
+ "amplify": "Re-distribute this piece on additional channels (social, email).",
163
+ "rewrite": "Rewrite this piece with a stronger hook, clearer CTA, and tighter structure.",
164
+ }.get(action, "Take action on the recommendation below.")
165
+
166
+
167
+ def compute_calibration(state_db_path: Path) -> dict:
168
+ """Score how well past recommendations actually panned out.
169
+
170
+ For each historical recommendation that has at least one metric_history
171
+ observation strictly after its first_seen_period, decide whether the
172
+ action's prediction held. Currently scores only ``double_down`` and
173
+ ``retire`` (the actions with a clear post-hoc test). Other actions are
174
+ counted as "unscored".
175
+
176
+ Returns::
177
+
178
+ {
179
+ "scored_recs": int,
180
+ "unscored_recs": int,
181
+ "by_action": {
182
+ "double_down": {"n": int, "panned_out": int, "rate": float,
183
+ "avg_confidence": float, "calibrated_lift": float},
184
+ ...
185
+ },
186
+ "high_conf_rate": float | None, # rate for recs with conf >= 0.8
187
+ "low_conf_rate": float | None, # rate for recs with conf < 0.5
188
+ }
189
+
190
+ "calibrated_lift" is the rate minus 0.5 (the chance baseline) — positive
191
+ means Argus's recs in this action are better than coin-flip. A negative
192
+ value means the action class is consistently wrong; treat with suspicion.
193
+ """
194
+ if not state_db_path.is_file():
195
+ return {"scored_recs": 0, "unscored_recs": 0, "by_action": {}}
196
+
197
+ with sqlite3.connect(state_db_path) as conn:
198
+ conn.row_factory = sqlite3.Row
199
+ try:
200
+ recs = conn.execute(
201
+ "SELECT id, action, target, source_ids_json, confidence, "
202
+ "first_seen_period FROM analytics_recommendations"
203
+ ).fetchall()
204
+ except sqlite3.OperationalError:
205
+ return {"scored_recs": 0, "unscored_recs": 0, "by_action": {}}
206
+
207
+ results: dict[str, dict] = {}
208
+ scored = 0
209
+ unscored = 0
210
+ high_conf_hits = 0
211
+ high_conf_total = 0
212
+ low_conf_hits = 0
213
+ low_conf_total = 0
214
+
215
+ for r in recs:
216
+ action = r["action"]
217
+ confidence = float(r["confidence"])
218
+ first_seen = r["first_seen_period"]
219
+ source_ids = json.loads(r["source_ids_json"] or "[]")
220
+ if not source_ids or action not in {"double_down", "retire"}:
221
+ unscored += 1
222
+ continue
223
+
224
+ # Pull post-period observations for source content
225
+ placeholders = ",".join("?" for _ in source_ids)
226
+ obs = conn.execute(
227
+ f"SELECT content_id, period_end, primary_metric "
228
+ f"FROM metric_history "
229
+ f"WHERE content_id IN ({placeholders}) AND period_end > ?",
230
+ (*source_ids, first_seen),
231
+ ).fetchall()
232
+ if not obs:
233
+ unscored += 1
234
+ continue
235
+
236
+ # Anchor: each source content's metric AT first_seen
237
+ anchors = {
238
+ row["content_id"]: float(row["primary_metric"])
239
+ for row in conn.execute(
240
+ f"SELECT content_id, primary_metric FROM metric_history "
241
+ f"WHERE content_id IN ({placeholders}) AND period_end = ?",
242
+ (*source_ids, first_seen),
243
+ ).fetchall()
244
+ }
245
+ if not anchors:
246
+ unscored += 1
247
+ continue
248
+
249
+ # Decision rule
250
+ # double_down: prediction holds if subsequent avg >= 0.9 * anchor
251
+ # retire: prediction holds if subsequent max <= 1.1 * anchor (didn't recover)
252
+ held = _decide_panned_out(action, anchors, obs)
253
+
254
+ scored += 1
255
+ bucket = results.setdefault(
256
+ action,
257
+ {"n": 0, "panned_out": 0, "_conf_sum": 0.0},
258
+ )
259
+ bucket["n"] += 1
260
+ bucket["_conf_sum"] += confidence
261
+ if held:
262
+ bucket["panned_out"] += 1
263
+ if confidence >= 0.8:
264
+ high_conf_hits += 1
265
+ if confidence < 0.5:
266
+ low_conf_hits += 1
267
+ if confidence >= 0.8:
268
+ high_conf_total += 1
269
+ if confidence < 0.5:
270
+ low_conf_total += 1
271
+
272
+ by_action: dict[str, dict] = {}
273
+ for action, b in results.items():
274
+ rate = b["panned_out"] / b["n"] if b["n"] else 0.0
275
+ by_action[action] = {
276
+ "n": b["n"],
277
+ "panned_out": b["panned_out"],
278
+ "rate": round(rate, 3),
279
+ "avg_confidence": round(b["_conf_sum"] / b["n"], 3) if b["n"] else 0.0,
280
+ "calibrated_lift": round(rate - 0.5, 3),
281
+ }
282
+ return {
283
+ "scored_recs": scored,
284
+ "unscored_recs": unscored,
285
+ "by_action": by_action,
286
+ "high_conf_rate": (round(high_conf_hits / high_conf_total, 3) if high_conf_total else None),
287
+ "low_conf_rate": (round(low_conf_hits / low_conf_total, 3) if low_conf_total else None),
288
+ }
289
+
290
+
291
+ def _decide_panned_out(
292
+ action: str,
293
+ anchors: dict[str, float],
294
+ obs: list,
295
+ ) -> bool:
296
+ """Did the action's prediction hold for these source content observations?
297
+
298
+ For each source content_id, average its primary_metric across all
299
+ post-anchor observations. Then aggregate across source ids:
300
+
301
+ - ``double_down``: held if AVG(post_avg / anchor) >= 0.9 (held steady or grew)
302
+ - ``retire``: held if AVG(post_avg / anchor) <= 1.1 (did not recover)
303
+ """
304
+ by_content: dict[str, list[float]] = {}
305
+ for row in obs:
306
+ by_content.setdefault(row["content_id"], []).append(float(row["primary_metric"]))
307
+
308
+ ratios: list[float] = []
309
+ for cid, vals in by_content.items():
310
+ anchor = anchors.get(cid, 0.0)
311
+ if anchor <= 0:
312
+ continue
313
+ avg = sum(vals) / len(vals)
314
+ ratios.append(avg / anchor)
315
+ if not ratios:
316
+ return False
317
+ overall = sum(ratios) / len(ratios)
318
+ if action == "double_down":
319
+ return overall >= 0.9
320
+ if action == "retire":
321
+ return overall <= 1.1
322
+ return False
323
+
324
+
325
+ def write_recommendation_briefs(
326
+ report: PerformanceReport,
327
+ briefs_dir: Path,
328
+ ) -> list[Path]:
329
+ """For each actionable recommendation in ``report``, stage a Mox-ready
330
+ content brief on disk.
331
+
332
+ The brief is intentionally human-reviewable, not auto-executed: it gives
333
+ a one-click handoff between Argus's recommendation and Mox's content
334
+ pipeline. Recommendations with action ∈ {retire, investigate, retest}
335
+ are skipped (not content tasks).
336
+
337
+ Returns the list of file paths written.
338
+ """
339
+ briefs_dir.mkdir(parents=True, exist_ok=True)
340
+ paths: list[Path] = []
341
+ period = report.period_end.date().isoformat()
342
+ for i, rec in enumerate(report.recommendations):
343
+ if rec.action not in _BRIEF_ACTIONS:
344
+ continue
345
+ # Slugify target for filename; keep readable
346
+ slug = _slugify_target(rec.target)
347
+ out = briefs_dir / f"argus-brief-{period}-{rec.action}-{slug}.md"
348
+ out.write_text(_render_brief(rec, period), encoding="utf-8")
349
+ paths.append(out)
350
+ if i >= 100: # safety cap so a runaway LLM can't fill the disk
351
+ break
352
+ return paths
353
+
354
+
355
+ def _slugify_target(target: str) -> str:
356
+ """Turn 'theme:python-testing' or 'blog/cli-launch' into a safe filename slug."""
357
+ import re as _re
358
+
359
+ return _re.sub(r"[^a-z0-9]+", "-", target.lower()).strip("-")[:60] or "rec"
360
+
361
+
362
+ def _render_brief(rec: Recommendation, period: str) -> str:
363
+ intent = _action_to_brief_intent(rec.action)
364
+ lines: list[str] = []
365
+ lines.append(f"# Argus brief — {rec.action}: `{rec.target}`")
366
+ lines.append("")
367
+ lines.append(f"**Period:** {period}")
368
+ lines.append(f"**Action:** `{rec.action}` ({rec.target_type})")
369
+ lines.append(f"**Confidence:** {rec.confidence:.2f}")
370
+ lines.append("")
371
+ lines.append("## Intent")
372
+ lines.append(intent)
373
+ lines.append("")
374
+ lines.append("## Why")
375
+ lines.append(rec.rationale)
376
+ lines.append("")
377
+ if rec.evidence:
378
+ lines.append("## Evidence")
379
+ for ev in rec.evidence:
380
+ lines.append(f"- {ev}")
381
+ lines.append("")
382
+ if rec.source_ids:
383
+ lines.append("## Source content")
384
+ for sid in rec.source_ids:
385
+ lines.append(f"- `{sid}`")
386
+ lines.append("")
387
+ lines.append("## Next step")
388
+ lines.append(
389
+ "Hand this brief to Mox or Kai. The content pipeline can consume the "
390
+ "intent + evidence directly:"
391
+ )
392
+ lines.append("")
393
+ lines.append("```bash")
394
+ if rec.action == "double_down":
395
+ lines.append(f"devrel content draft '{rec.target} — follow-up post' --type tutorial")
396
+ elif rec.action == "rewrite":
397
+ lines.append("devrel content audit deliverables/<file> # then redraft based on findings")
398
+ elif rec.action == "amplify":
399
+ lines.append(f"devrel marketing social '{rec.target}' --channels reddit,hn,twitter")
400
+ lines.append("```")
401
+ return "\n".join(lines) + "\n"
402
+
403
+
404
+ def _render_markdown(report: PerformanceReport) -> str:
405
+ lines: list[str] = []
406
+ start = report.period_start.date().isoformat()
407
+ end = report.period_end.date().isoformat()
408
+ lines.append(f"# Argus Performance Report — {start} to {end}")
409
+ lines.append("")
410
+
411
+ lines.append("## Source health")
412
+ for source, ok in report.sources_ok.items():
413
+ lines.append(f"- {source}: {'ok' if ok else 'failed'}")
414
+ if report.llm_error:
415
+ lines.append(f"- llm: failed ({report.llm_error})")
416
+ if report.insufficient_data:
417
+ lines.append("")
418
+ lines.append("> **Insufficient data** — too little signal for trustworthy recommendations.")
419
+ lines.append("")
420
+
421
+ lines.append("## Top performers")
422
+ if not report.top_performers:
423
+ lines.append("_None this period._")
424
+ for m in report.top_performers:
425
+ pct = f"p{m.percentile:.0f}" if m.percentile is not None else "p?"
426
+ lines.append(
427
+ f"- **{m.content_id}** ({m.content_type}) — "
428
+ f"{m.primary_metric:g} {m.metric_name} ({pct})"
429
+ )
430
+ lines.append("")
431
+
432
+ lines.append("## Bottom performers")
433
+ if not report.bottom_performers:
434
+ lines.append("_None this period._")
435
+ for m in report.bottom_performers:
436
+ pct = f"p{m.percentile:.0f}" if m.percentile is not None else "p?"
437
+ lines.append(
438
+ f"- **{m.content_id}** ({m.content_type}) — "
439
+ f"{m.primary_metric:g} {m.metric_name} ({pct})"
440
+ )
441
+ lines.append("")
442
+
443
+ lines.append("## Trend signals")
444
+ if not report.trend_signals:
445
+ lines.append("_None._")
446
+ for sig in report.trend_signals:
447
+ lines.append(f"- {sig}")
448
+ lines.append("")
449
+
450
+ lines.append("## Recommendations")
451
+ if not report.recommendations:
452
+ lines.append("_No recommendations this period._")
453
+ else:
454
+ grouped: dict[str, list[Recommendation]] = {}
455
+ for r in report.recommendations:
456
+ grouped.setdefault(r.action, []).append(r)
457
+ for action in _REC_ACTION_ORDER:
458
+ bucket = grouped.get(action, [])
459
+ if not bucket:
460
+ continue
461
+ lines.append(f"### {action} ({len(bucket)})")
462
+ for r in bucket:
463
+ stale_tag = ""
464
+ if r.first_seen_period:
465
+ try:
466
+ first = datetime.fromisoformat(r.first_seen_period.replace("Z", "+00:00"))
467
+ weeks = (report.period_end - first).days // 7
468
+ if weeks >= 2:
469
+ stale_tag = f" [STALE {weeks}w]"
470
+ except (ValueError, TypeError):
471
+ pass
472
+ lines.append(
473
+ f"- **{r.target}** (conf {r.confidence:.2f}){stale_tag} — {r.rationale}"
474
+ )
475
+ if r.source_ids:
476
+ lines.append(f" - sources: {', '.join(r.source_ids)}")
477
+ for ev in r.evidence:
478
+ lines.append(f" - evidence: {ev}")
479
+ lines.append("")
480
+ return "\n".join(lines).rstrip() + "\n"
481
+
482
+
483
+ def _score_metrics(
484
+ metrics: list[PerformanceMetric],
485
+ *,
486
+ baseline_by_id: dict[str, float],
487
+ ) -> list[PerformanceMetric]:
488
+ """Annotate each metric with percentile, wow_delta, and anomaly_flag.
489
+
490
+ Pure function — input metrics are not mutated; new instances are returned.
491
+
492
+ - percentile: rank within same content_type peers (0..100, 100 = best)
493
+ - wow_delta: % change vs baseline_by_id[content_id], None if no baseline
494
+ - anomaly_flag: |z-score| > _ANOMALY_Z_THRESHOLD against group mean/stdev
495
+ """
496
+ by_type: dict[str, list[PerformanceMetric]] = {}
497
+ for m in metrics:
498
+ by_type.setdefault(m.content_type, []).append(m)
499
+
500
+ out: list[PerformanceMetric] = []
501
+ for group in by_type.values():
502
+ values = [m.primary_metric for m in group]
503
+ n = len(values)
504
+ mean = statistics.fmean(values) if values else 0.0
505
+ stdev = statistics.pstdev(values) if n > 1 else 0.0
506
+
507
+ for m in group:
508
+ if n <= 1:
509
+ pct = 100.0
510
+ else:
511
+ lower = sum(1 for v in values if v < m.primary_metric)
512
+ pct = (lower / (n - 1)) * 100.0
513
+
514
+ baseline = baseline_by_id.get(m.content_id)
515
+ if baseline is None or baseline == 0:
516
+ wow = None
517
+ else:
518
+ wow = ((m.primary_metric - baseline) / baseline) * 100.0
519
+
520
+ anomaly = False
521
+ if stdev > 0:
522
+ z = (m.primary_metric - mean) / stdev
523
+ anomaly = abs(z) > _ANOMALY_Z_THRESHOLD
524
+
525
+ out.append(
526
+ PerformanceMetric(
527
+ content_id=m.content_id,
528
+ content_type=m.content_type,
529
+ title=m.title,
530
+ url=m.url,
531
+ published_at=m.published_at,
532
+ primary_metric=m.primary_metric,
533
+ metric_name=m.metric_name,
534
+ secondary_metrics=dict(m.secondary_metrics),
535
+ percentile=round(pct, 2),
536
+ wow_delta=round(wow, 2) if wow is not None else None,
537
+ anomaly_flag=anomaly,
538
+ )
539
+ )
540
+ return out
541
+
542
+
543
+ class Argus:
544
+ """Content performance analyst.
545
+
546
+ Orchestrates four collectors in parallel, scores metrics deterministically,
547
+ and asks a Sonnet LLM to generate structured Recommendation objects from
548
+ the ranked leaderboard. Per-collector failures are isolated and surfaced in
549
+ PerformanceReport.sources_ok rather than aborting the whole report.
550
+ """
551
+
552
+ def __init__(
553
+ self,
554
+ posthog_collector,
555
+ github_collector,
556
+ instantly_collector,
557
+ social_collector,
558
+ llm_client: Optional[Any] = None,
559
+ state_db_path: Optional[Path] = None,
560
+ ):
561
+ self._collectors = {
562
+ "posthog": posthog_collector,
563
+ "github": github_collector,
564
+ "instantly": instantly_collector,
565
+ "social": social_collector,
566
+ }
567
+ self.llm_client = llm_client
568
+ self.state_db_path = state_db_path
569
+ self._system_prompt = load_agent_prompt(
570
+ "argus",
571
+ "system_prompt.txt",
572
+ self._DEFAULT_SYSTEM_PROMPT,
573
+ )
574
+
575
+ async def run(
576
+ self,
577
+ period_start: datetime,
578
+ period_end: datetime,
579
+ ) -> PerformanceReport:
580
+ """Pull, score, recommend, persist. Returns the PerformanceReport."""
581
+ period = (period_start, period_end)
582
+ all_metrics, sources_ok = await self._gather(period)
583
+
584
+ if not all_metrics:
585
+ logger.info(
586
+ "argus.run: insufficient_data — no metrics from any source",
587
+ extra={
588
+ "period_start": period_start.isoformat(),
589
+ "period_end": period_end.isoformat(),
590
+ "sources_ok": sources_ok,
591
+ },
592
+ )
593
+ return PerformanceReport(
594
+ period_start=period_start,
595
+ period_end=period_end,
596
+ top_performers=[],
597
+ bottom_performers=[],
598
+ trend_signals=[],
599
+ recommendations=[],
600
+ sources_ok=sources_ok,
601
+ insufficient_data=True,
602
+ )
603
+
604
+ baseline = await self._load_baselines() if self.state_db_path else {}
605
+ logger.info(
606
+ "argus.baselines_loaded",
607
+ extra={"baseline_count": len(baseline)},
608
+ )
609
+ scored = _score_metrics(all_metrics, baseline_by_id=baseline)
610
+ anomaly_count = sum(1 for m in scored if m.anomaly_flag)
611
+ logger.info(
612
+ "argus.scored",
613
+ extra={
614
+ "scored_count": len(scored),
615
+ "anomaly_count": anomaly_count,
616
+ "content_types": sorted({m.content_type for m in scored}),
617
+ },
618
+ )
619
+
620
+ top, bottom = self._top_bottom(scored)
621
+
622
+ recs: list[Recommendation] = []
623
+ trend_signals: list[str] = []
624
+ llm_error: Optional[str] = None
625
+ if self.llm_client:
626
+ try:
627
+ recs, trend_signals = await self._generate_recommendations(scored)
628
+ except Exception as exc: # noqa: BLE001
629
+ logger.warning("Argus LLM step failed: %s", exc)
630
+ llm_error = str(exc)
631
+ logger.info(
632
+ "argus.recommendations_generated",
633
+ extra={
634
+ "recs_count": len(recs),
635
+ "trend_signals_count": len(trend_signals),
636
+ "llm_error": llm_error,
637
+ },
638
+ )
639
+
640
+ report = PerformanceReport(
641
+ period_start=period_start,
642
+ period_end=period_end,
643
+ top_performers=top,
644
+ bottom_performers=bottom,
645
+ trend_signals=trend_signals,
646
+ recommendations=recs,
647
+ sources_ok=sources_ok,
648
+ llm_error=llm_error,
649
+ )
650
+
651
+ if self.state_db_path:
652
+ await self._persist(report, scored)
653
+ logger.info(
654
+ "argus.persisted",
655
+ extra={
656
+ "period_end": period_end.isoformat(),
657
+ "metric_history_rows": len(scored),
658
+ },
659
+ )
660
+
661
+ return report
662
+
663
+ async def _gather(
664
+ self,
665
+ period: tuple[datetime, datetime],
666
+ ) -> tuple[list[PerformanceMetric], dict[str, bool]]:
667
+ """Run all four collectors in parallel; isolate per-source failures."""
668
+ names = list(self._collectors.keys())
669
+ coros = [c.collect(period) for c in self._collectors.values()]
670
+ results = await asyncio.gather(*coros, return_exceptions=True)
671
+
672
+ all_metrics: list[PerformanceMetric] = []
673
+ sources_ok: dict[str, bool] = {}
674
+ for name, result in zip(names, results, strict=True):
675
+ if isinstance(result, Exception):
676
+ sources_ok[name] = False
677
+ logger.warning("Argus collector %s raised: %s", name, result)
678
+ else:
679
+ sources_ok[name] = True
680
+ all_metrics.extend(result)
681
+ logger.info(
682
+ "argus.gather_complete",
683
+ extra={
684
+ "ok_sources": sorted(k for k, v in sources_ok.items() if v),
685
+ "failed_sources": sorted(k for k, v in sources_ok.items() if not v),
686
+ "total_metrics": len(all_metrics),
687
+ },
688
+ )
689
+ return all_metrics, sources_ok
690
+
691
+ @staticmethod
692
+ def _top_bottom(
693
+ scored: list[PerformanceMetric],
694
+ ) -> tuple[list[PerformanceMetric], list[PerformanceMetric]]:
695
+ """Top 5 and bottom 3 per content_type, flattened."""
696
+ by_type: dict[str, list[PerformanceMetric]] = {}
697
+ for m in scored:
698
+ by_type.setdefault(m.content_type, []).append(m)
699
+ top: list[PerformanceMetric] = []
700
+ bottom: list[PerformanceMetric] = []
701
+ for group in by_type.values():
702
+ ranked = sorted(group, key=lambda m: m.primary_metric, reverse=True)
703
+ top.extend(ranked[:5])
704
+ bottom.extend(list(reversed(ranked[-3:])))
705
+ return top, bottom
706
+
707
+ async def _load_baselines(self) -> dict[str, float]:
708
+ """Async wrapper that delegates SQLite read to a thread."""
709
+ if not self.state_db_path or not self.state_db_path.is_file():
710
+ return {}
711
+ return await asyncio.to_thread(self._load_baselines_sync)
712
+
713
+ def _load_baselines_sync(self) -> dict[str, float]:
714
+ """Read the most recent prior period's primary_metric per content_id.
715
+
716
+ Used by ``_score_metrics`` for week-over-week deltas. Reads from the
717
+ indexed ``metric_history`` table when available (single SELECT, no
718
+ JSON deserialization). Falls back to the legacy ``all_primary`` blob
719
+ and then to top/bottom slices for reports written before either
720
+ existed. Returns {} when the DB has no prior data.
721
+ """
722
+ try:
723
+ with sqlite3.connect(self.state_db_path) as conn:
724
+ conn.row_factory = sqlite3.Row
725
+ # Prefer indexed metric_history. Pick the most recent period
726
+ # and pull all content_id rows from it.
727
+ latest = conn.execute("SELECT MAX(period_end) AS p FROM metric_history").fetchone()
728
+ if latest and latest["p"]:
729
+ rows = conn.execute(
730
+ "SELECT content_id, primary_metric FROM metric_history "
731
+ "WHERE period_end = ?",
732
+ (latest["p"],),
733
+ ).fetchall()
734
+ if rows:
735
+ return {r["content_id"]: float(r["primary_metric"]) for r in rows}
736
+
737
+ # Fallback: legacy blob in analytics_reports.
738
+ row = conn.execute(
739
+ "SELECT report_json FROM analytics_reports ORDER BY period_end DESC LIMIT 1"
740
+ ).fetchone()
741
+ except sqlite3.OperationalError:
742
+ return {}
743
+ if not row:
744
+ return {}
745
+ try:
746
+ data = json.loads(row["report_json"])
747
+ except json.JSONDecodeError:
748
+ return {}
749
+ all_primary = data.get("all_primary")
750
+ if isinstance(all_primary, dict) and all_primary:
751
+ return {cid: float(v) for cid, v in all_primary.items()}
752
+ baseline: dict[str, float] = {}
753
+ for section in ("top_performers", "bottom_performers"):
754
+ for entry in data.get(section, []):
755
+ cid = entry.get("content_id")
756
+ if cid:
757
+ baseline[cid] = float(entry.get("primary_metric", 0.0))
758
+ return baseline
759
+
760
+ async def _persist(
761
+ self,
762
+ report: PerformanceReport,
763
+ all_metrics: list[PerformanceMetric],
764
+ ) -> None:
765
+ """Async wrapper that delegates the SQLite write to a thread."""
766
+ if not self.state_db_path:
767
+ return
768
+ await asyncio.to_thread(self._persist_sync, report, all_metrics)
769
+
770
+ def _persist_sync(
771
+ self,
772
+ report: PerformanceReport,
773
+ all_metrics: list[PerformanceMetric],
774
+ ) -> None:
775
+ """Serialize the full report to three tables in one transaction:
776
+
777
+ - ``analytics_reports``: human-readable JSON archive
778
+ - ``metric_history``: indexed (content_id, period_end) time-series for
779
+ baseline lookups
780
+ - ``analytics_recommendations``: per-rec rows for v2 routing
781
+ (queryable by action/target without parsing the report blob)
782
+
783
+ Lifecycle: when (action, target) re-emerges from a prior report,
784
+ ``first_seen_period`` carries over the earliest value so 'staleness'
785
+ accumulates across runs.
786
+ """
787
+ payload = report.to_json()
788
+ payload["all_primary"] = {m.content_id: m.primary_metric for m in all_metrics}
789
+ period_end_iso = report.period_end.isoformat()
790
+ with sqlite3.connect(self.state_db_path) as conn:
791
+ conn.row_factory = sqlite3.Row
792
+ cur = conn.execute(
793
+ "INSERT INTO analytics_reports "
794
+ "(period_start, period_end, report_json) VALUES (?, ?, ?)",
795
+ (
796
+ report.period_start.isoformat(),
797
+ period_end_iso,
798
+ json.dumps(payload),
799
+ ),
800
+ )
801
+ report_id = cur.lastrowid
802
+ conn.executemany(
803
+ "INSERT OR REPLACE INTO metric_history "
804
+ "(content_id, period_end, primary_metric, metric_name, content_type) "
805
+ "VALUES (?, ?, ?, ?, ?)",
806
+ [
807
+ (m.content_id, period_end_iso, m.primary_metric, m.metric_name, m.content_type)
808
+ for m in all_metrics
809
+ ],
810
+ )
811
+ if report.recommendations:
812
+ # Lifecycle: if (action, target) was seen in a prior report,
813
+ # carry over the earliest first_seen_period so 'staleness'
814
+ # accumulates across runs.
815
+ rec_rows: list[tuple] = []
816
+ for r in report.recommendations:
817
+ prior = conn.execute(
818
+ "SELECT MIN(first_seen_period) AS first FROM analytics_recommendations "
819
+ "WHERE pillar = ? AND action = ? AND target = ?",
820
+ (Pillar.ARGUS.value, r.action, r.target),
821
+ ).fetchone()
822
+ first_seen = prior["first"] if prior and prior["first"] else period_end_iso
823
+ # Stamp on the in-memory rec too so to_json/to_markdown see it
824
+ r.first_seen_period = first_seen
825
+ rec_rows.append(
826
+ (
827
+ report_id,
828
+ period_end_iso,
829
+ r.action,
830
+ r.target,
831
+ r.target_type,
832
+ r.rationale,
833
+ r.confidence,
834
+ json.dumps(list(r.source_ids)),
835
+ json.dumps(list(r.evidence)),
836
+ first_seen,
837
+ Pillar.ARGUS.value,
838
+ TargetKind.CONTENT_ID.value,
839
+ )
840
+ )
841
+ conn.executemany(
842
+ "INSERT INTO analytics_recommendations "
843
+ "(report_id, period_end, action, target, target_type, "
844
+ "rationale, confidence, source_ids_json, evidence_json, "
845
+ "first_seen_period, pillar, target_kind) "
846
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
847
+ rec_rows,
848
+ )
849
+ conn.commit()
850
+
851
+ _DEFAULT_SYSTEM_PROMPT = """You are Argus, a content performance analyst. \
852
+ Given a ranked leaderboard of content with engagement metrics, you produce \
853
+ structured optimization recommendations.
854
+
855
+ Your action vocabulary is closed. Use exactly one of:
856
+ - double_down: theme/channel is winning; produce more of this kind of content
857
+ - retire: content/theme is consistently underperforming; stop investing
858
+ - rewrite: specific piece has potential but is poorly executed; redo it
859
+ - retest: result is inconclusive; re-run with more samples or a different cohort
860
+ - amplify: already-good content is under-distributed; push harder on existing channels
861
+ - investigate: anomaly you cannot confidently explain; flag for human review
862
+
863
+ Be evidence-based. Every recommendation must cite specific metrics with content_ids.
864
+ Bias toward fewer, higher-confidence recommendations. Five strong recs beat fifteen weak ones.
865
+ Confidence below 0.5 means "investigate" — do not recommend a directional action."""
866
+
867
+ @property
868
+ def SYSTEM_PROMPT(self) -> str:
869
+ return self._system_prompt
870
+
871
+ async def _generate_recommendations(
872
+ self,
873
+ scored: list[PerformanceMetric],
874
+ ) -> tuple[list[Recommendation], list[str]]:
875
+ """One Sonnet call. Returns (recommendations, trend_signals).
876
+
877
+ Bounded input: top 10 + bottom 5 per content type, capped at 50 lines.
878
+ Output: JSON with ``recommendations`` and ``trend_signals`` arrays.
879
+ """
880
+ by_type: dict[str, list[PerformanceMetric]] = {}
881
+ for m in scored:
882
+ by_type.setdefault(m.content_type, []).append(m)
883
+
884
+ sections: list[str] = []
885
+ total = 0
886
+ types_dropped: list[tuple[str, int]] = [] # (content_type, item_count)
887
+ for ctype, group in by_type.items():
888
+ if total >= 50:
889
+ # Whole content type dropped — record so the prompt notes it
890
+ types_dropped.append((ctype, len(group)))
891
+ continue
892
+ ranked = sorted(group, key=lambda m: m.primary_metric, reverse=True)
893
+ slice_ = ranked[:10] + (ranked[-5:] if len(ranked) > 10 else [])
894
+ metric_name = ranked[0].metric_name if ranked else "n/a"
895
+ section_lines = [
896
+ f"### {ctype.upper()} ({len(group)} items, primary metric: {metric_name})"
897
+ ]
898
+ shown = 0
899
+ for m in slice_:
900
+ if total >= 50:
901
+ break
902
+ pct = f"p{m.percentile:.0f}" if m.percentile is not None else "p?"
903
+ wow = f", wow {m.wow_delta:+.1f}%" if m.wow_delta is not None else ""
904
+ anom = " [ANOMALY]" if m.anomaly_flag else ""
905
+ section_lines.append(
906
+ f"- {m.content_id}: {m.primary_metric:g} {m.metric_name} "
907
+ f"({pct}{wow}){anom} — {m.title}"
908
+ )
909
+ total += 1
910
+ shown += 1
911
+ if shown < len(slice_):
912
+ # Partial section — note how many were truncated
913
+ omitted = len(slice_) - shown
914
+ section_lines.append(
915
+ f"- ... ({omitted} more {ctype} items omitted from this section)"
916
+ )
917
+ sections.append("\n".join(section_lines))
918
+
919
+ if types_dropped:
920
+ dropped_summary = ", ".join(f"{ctype} ({n} items)" for ctype, n in types_dropped)
921
+ sections.append(
922
+ f"### TRUNCATED\nEntire content types omitted from prompt: {dropped_summary}"
923
+ )
924
+
925
+ leaderboard = "\n\n".join(sections)
926
+ user_prompt = f"""Period leaderboard (top 10 + bottom 5 per content type):
927
+
928
+ {leaderboard}
929
+
930
+ Return a JSON object with two top-level keys:
931
+ - "recommendations": array of {{action, target, target_type, rationale, evidence, confidence, source_ids}}
932
+ - "trend_signals": array of short strings describing themes/channel patterns (3-7 items)
933
+
934
+ action ∈ {{double_down, retire, rewrite, retest, amplify, investigate}}
935
+ target_type ∈ {{content, theme, channel}}
936
+ confidence ∈ [0.0, 1.0]; below 0.5 use action="investigate".
937
+ source_ids: array of content_id strings from the leaderboard above that back this
938
+ recommendation (use the exact content_id values; min 1, max 5). For target_type="theme"
939
+ or "channel", list the exemplary content_ids that motivated the recommendation.
940
+
941
+ Do not include any commentary outside the JSON."""
942
+
943
+ raw = await self.llm_client.generate(
944
+ system_prompt=self.SYSTEM_PROMPT,
945
+ user_prompt=user_prompt,
946
+ temperature=0.2,
947
+ max_tokens=2048,
948
+ )
949
+ cleaned = strip_markdown_fences(raw).strip()
950
+ data = json.loads(cleaned)
951
+ recs = [
952
+ Recommendation(
953
+ action=r["action"],
954
+ target=r["target"],
955
+ target_type=r["target_type"],
956
+ rationale=r["rationale"],
957
+ evidence=list(r.get("evidence", [])),
958
+ confidence=float(r["confidence"]),
959
+ source_ids=list(r.get("source_ids", [])),
960
+ )
961
+ for r in data.get("recommendations", [])
962
+ ]
963
+ trend_signals = list(data.get("trend_signals", []))
964
+ return recs, trend_signals