devrel-origin 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. devrel_origin/__init__.py +15 -0
  2. devrel_origin/cli/__init__.py +92 -0
  3. devrel_origin/cli/_common.py +243 -0
  4. devrel_origin/cli/analytics.py +28 -0
  5. devrel_origin/cli/argus.py +497 -0
  6. devrel_origin/cli/auth.py +227 -0
  7. devrel_origin/cli/config.py +108 -0
  8. devrel_origin/cli/content.py +259 -0
  9. devrel_origin/cli/cost.py +108 -0
  10. devrel_origin/cli/cro.py +298 -0
  11. devrel_origin/cli/deliverables.py +65 -0
  12. devrel_origin/cli/docs.py +91 -0
  13. devrel_origin/cli/doctor.py +178 -0
  14. devrel_origin/cli/experiment.py +29 -0
  15. devrel_origin/cli/growth.py +97 -0
  16. devrel_origin/cli/init.py +472 -0
  17. devrel_origin/cli/intel.py +27 -0
  18. devrel_origin/cli/kb.py +96 -0
  19. devrel_origin/cli/listen.py +31 -0
  20. devrel_origin/cli/marketing.py +66 -0
  21. devrel_origin/cli/migrate.py +45 -0
  22. devrel_origin/cli/run.py +46 -0
  23. devrel_origin/cli/sales.py +57 -0
  24. devrel_origin/cli/schedule.py +62 -0
  25. devrel_origin/cli/synthesize.py +28 -0
  26. devrel_origin/cli/triage.py +29 -0
  27. devrel_origin/cli/video.py +35 -0
  28. devrel_origin/core/__init__.py +58 -0
  29. devrel_origin/core/agent_config.py +75 -0
  30. devrel_origin/core/argus.py +964 -0
  31. devrel_origin/core/atlas.py +1450 -0
  32. devrel_origin/core/base.py +372 -0
  33. devrel_origin/core/cyra.py +563 -0
  34. devrel_origin/core/dex.py +708 -0
  35. devrel_origin/core/echo.py +614 -0
  36. devrel_origin/core/growth/__init__.py +27 -0
  37. devrel_origin/core/growth/recommendations.py +219 -0
  38. devrel_origin/core/growth/target_kinds.py +51 -0
  39. devrel_origin/core/iris.py +513 -0
  40. devrel_origin/core/kai.py +1367 -0
  41. devrel_origin/core/llm.py +542 -0
  42. devrel_origin/core/llm_backends.py +274 -0
  43. devrel_origin/core/mox.py +514 -0
  44. devrel_origin/core/nova.py +349 -0
  45. devrel_origin/core/pax.py +1205 -0
  46. devrel_origin/core/rex.py +532 -0
  47. devrel_origin/core/sage.py +486 -0
  48. devrel_origin/core/sentinel.py +385 -0
  49. devrel_origin/core/types.py +98 -0
  50. devrel_origin/core/video/__init__.py +22 -0
  51. devrel_origin/core/video/assembler.py +131 -0
  52. devrel_origin/core/video/browser_recorder.py +118 -0
  53. devrel_origin/core/video/desktop_recorder.py +254 -0
  54. devrel_origin/core/video/overlay_renderer.py +143 -0
  55. devrel_origin/core/video/script_parser.py +147 -0
  56. devrel_origin/core/video/tts_engine.py +82 -0
  57. devrel_origin/core/vox.py +268 -0
  58. devrel_origin/core/watchdog.py +321 -0
  59. devrel_origin/project/__init__.py +1 -0
  60. devrel_origin/project/config.py +75 -0
  61. devrel_origin/project/cost_sink.py +61 -0
  62. devrel_origin/project/init.py +104 -0
  63. devrel_origin/project/paths.py +75 -0
  64. devrel_origin/project/state.py +241 -0
  65. devrel_origin/project/templates/__init__.py +4 -0
  66. devrel_origin/project/templates/config.toml +24 -0
  67. devrel_origin/project/templates/devrel.gitignore +10 -0
  68. devrel_origin/project/templates/slop-blocklist.md +45 -0
  69. devrel_origin/project/templates/style.md +24 -0
  70. devrel_origin/project/templates/voice.md +29 -0
  71. devrel_origin/quality/__init__.py +66 -0
  72. devrel_origin/quality/editorial.py +357 -0
  73. devrel_origin/quality/persona.py +84 -0
  74. devrel_origin/quality/readability.py +148 -0
  75. devrel_origin/quality/slop.py +167 -0
  76. devrel_origin/quality/style.py +110 -0
  77. devrel_origin/quality/voice.py +15 -0
  78. devrel_origin/tools/__init__.py +9 -0
  79. devrel_origin/tools/analytics.py +304 -0
  80. devrel_origin/tools/api_client.py +393 -0
  81. devrel_origin/tools/apollo_client.py +305 -0
  82. devrel_origin/tools/code_validator.py +428 -0
  83. devrel_origin/tools/github_tools.py +297 -0
  84. devrel_origin/tools/instantly_client.py +412 -0
  85. devrel_origin/tools/kb_harvester.py +340 -0
  86. devrel_origin/tools/mcp_server.py +578 -0
  87. devrel_origin/tools/notifications.py +245 -0
  88. devrel_origin/tools/run_report.py +193 -0
  89. devrel_origin/tools/scheduler.py +231 -0
  90. devrel_origin/tools/search_tools.py +321 -0
  91. devrel_origin/tools/self_improve.py +168 -0
  92. devrel_origin/tools/sheets.py +236 -0
  93. devrel_origin-0.2.14.dist-info/METADATA +354 -0
  94. devrel_origin-0.2.14.dist-info/RECORD +98 -0
  95. devrel_origin-0.2.14.dist-info/WHEEL +5 -0
  96. devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
  97. devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
  98. devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,167 @@
1
+ """Anti-slop pipeline stage 5.
2
+
3
+ Three-step matching:
4
+ 1. Regex blocklist (deterministic, fast). Word-boundary, case-insensitive.
5
+ 2. LLM lint (Haiku). Catches context-sensitive slop the regex misses
6
+ (verbose intros, vague intensifiers in unusual phrasings).
7
+ 3. Force-rewrite (Sonnet). One targeted rewrite call with all hits listed.
8
+ If the rewrite still trips the blocklist on re-check, the orchestrator
9
+ aborts loud, see editorial.py.
10
+
11
+ Haiku's lint output occasionally hallucinates phrases that don't appear in
12
+ the source (verbatim from a real abort: 'replace this blockquote', 'replace
13
+ with' in a draft that contained neither). Hallucinated phrases would then go
14
+ into force_rewrite's flagged list and re-appear on the post-rewrite re-check,
15
+ falsely tripping the abort-loud condition. _verify_lint_hits filters out any
16
+ phrase that does not actually appear in the text (case-insensitive substring),
17
+ so only real slop survives into the rewrite + abort loop.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ import re
24
+ from dataclasses import dataclass
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class SlopHit:
31
+ phrase: str
32
+ start: int
33
+ end: int
34
+
35
+
36
+ def parse_blocklist(md: str) -> list[str]:
37
+ """Parse `slop-blocklist.md`. Returns lowercased phrases, one per
38
+ non-comment, non-blank line. Lines starting with `#` are comments."""
39
+ out: list[str] = []
40
+ for raw in md.splitlines():
41
+ line = raw.strip()
42
+ if not line or line.startswith("#"):
43
+ continue
44
+ out.append(line.lower())
45
+ return out
46
+
47
+
48
+ def find_slop(text: str, blocklist: list[str]) -> list[SlopHit]:
49
+ """Word-boundary, case-insensitive regex match. Returns one hit per
50
+ occurrence, in order."""
51
+ hits: list[SlopHit] = []
52
+ text_lower = text.lower()
53
+ for phrase in blocklist:
54
+ pattern = r"\b" + re.escape(phrase) + r"\b"
55
+ for m in re.finditer(pattern, text_lower):
56
+ hits.append(SlopHit(phrase=phrase, start=m.start(), end=m.end()))
57
+ hits.sort(key=lambda h: h.start)
58
+ return hits
59
+
60
+
61
+ _LINT_SYSTEM = (
62
+ "You are an editor screening AI-written content for tells the regex "
63
+ "blocklist would miss: verbose intros, vague intensifiers in unusual "
64
+ "phrasings, hedging that doesn't appear in the blocklist verbatim. "
65
+ "Return a flat list, one phrase per line, lowercase, no bullets, no "
66
+ "explanations. If nothing concerning, return an empty response."
67
+ )
68
+
69
+
70
+ def _normalize_lint_lines(raw: str) -> list[str]:
71
+ out: list[str] = []
72
+ for line in raw.splitlines():
73
+ s = line.strip()
74
+ if not s or s.startswith("#"):
75
+ continue
76
+ # Strip leading bullets / ordinals.
77
+ s = re.sub(r"^[\-\*•\d\.\)]+\s*", "", s)
78
+ if s:
79
+ out.append(s.lower())
80
+ return out
81
+
82
+
83
+ def _verify_lint_hits(text: str, candidates: list[str]) -> list[str]:
84
+ """Drop hallucinated lint hits (phrases Haiku flagged that don't appear).
85
+
86
+ Case-insensitive substring match. We deliberately don't require word
87
+ boundaries because Haiku sometimes returns slightly truncated phrases
88
+ (e.g. 'in essence' for the substring 'in essence,'); a substring match
89
+ accepts those while still rejecting fully-fabricated phrases. Logs the
90
+ filtered set at INFO so we can monitor Haiku's hallucination rate without
91
+ polluting user-facing output.
92
+ """
93
+ text_lower = text.lower()
94
+ real: list[str] = []
95
+ hallucinated: list[str] = []
96
+ for phrase in candidates:
97
+ if phrase and phrase in text_lower:
98
+ real.append(phrase)
99
+ else:
100
+ hallucinated.append(phrase)
101
+ if hallucinated:
102
+ logger.info(
103
+ "slop_lint_filtered_hallucinations",
104
+ extra={
105
+ "filtered_count": len(hallucinated),
106
+ "kept_count": len(real),
107
+ "filtered": hallucinated[:10], # cap log size
108
+ },
109
+ )
110
+ return real
111
+
112
+
113
+ async def llm_lint(text: str, voice: str, llm_client) -> list[str]:
114
+ """Haiku-powered second-pass slop detector.
115
+
116
+ Verifies each Haiku-flagged phrase actually appears in the source so
117
+ hallucinated flags don't propagate into force_rewrite or trigger the
118
+ orchestrator's post-rewrite abort-loud check.
119
+ """
120
+ user = (
121
+ "Voice contract for this product:\n\n" + (voice or "(none)") + "\n\n"
122
+ "Content to screen:\n\n" + text + "\n\n"
123
+ "List the phrases that read as AI-written, one per line. Empty if clean. "
124
+ "Only list phrases that appear verbatim in the content above, exactly as "
125
+ "they appear there; do not paraphrase, abbreviate, or invent phrases."
126
+ )
127
+ raw = await llm_client.generate(
128
+ system_prompt=_LINT_SYSTEM,
129
+ user_prompt=user,
130
+ model="haiku",
131
+ )
132
+ return _verify_lint_hits(text, _normalize_lint_lines(raw))
133
+
134
+
135
+ _REWRITE_SYSTEM = (
136
+ "You are a rewrite editor. The reader has flagged specific phrases as "
137
+ "AI-written. Rewrite the content so none of the flagged phrases (or "
138
+ "their close synonyms) appear, while preserving meaning, structure, "
139
+ "and the project's voice. Return only the rewritten content — no "
140
+ "preamble, no explanation."
141
+ )
142
+
143
+
144
+ async def force_rewrite(
145
+ text: str,
146
+ regex_hits: list[SlopHit],
147
+ llm_lint_hits: list[str],
148
+ voice: str,
149
+ llm_client,
150
+ ) -> str:
151
+ """Single Sonnet rewrite with the full flagged list. Caller is
152
+ responsible for re-running `find_slop` + `llm_lint` to verify the
153
+ rewrite cleared the issues."""
154
+ flagged = sorted({h.phrase for h in regex_hits} | set(llm_lint_hits))
155
+ flagged_listing = "\n".join(f"- {p}" for p in flagged)
156
+ user = (
157
+ "Voice contract:\n\n" + (voice or "(none)") + "\n\n"
158
+ "Flagged phrases (do not let any of these appear in the rewrite, "
159
+ "and avoid close synonyms):\n\n" + flagged_listing + "\n\n"
160
+ "Original content:\n\n" + text
161
+ )
162
+ rewritten = await llm_client.generate(
163
+ system_prompt=_REWRITE_SYSTEM,
164
+ user_prompt=user,
165
+ model="sonnet",
166
+ )
167
+ return rewritten.strip()
@@ -0,0 +1,110 @@
1
+ """Load style.md and parse the per-content-type targets table.
2
+
3
+ Content type names are normalized to snake_case for keying (e.g.,
4
+ "Blog post" -> "blog_post"). Targets parsing is best-effort: malformed
5
+ rows are skipped. If the file or table is missing, callers fall back to
6
+ DEFAULT_TARGETS.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+ from dataclasses import dataclass
13
+
14
+ from devrel_origin.project.paths import ProjectPaths
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class ContentTypeTargets:
19
+ flesch_min: int
20
+ flesch_max: int
21
+ sentence_len_min: int
22
+ sentence_len_max: int
23
+ jargon_density: str
24
+
25
+
26
+ DEFAULT_TARGETS: dict[str, ContentTypeTargets] = {
27
+ "tutorial": ContentTypeTargets(50, 65, 12, 18, "medium"),
28
+ "blog_post": ContentTypeTargets(55, 70, 12, 20, "low-medium"),
29
+ "landing_page": ContentTypeTargets(60, 75, 10, 15, "low"),
30
+ "cold_email": ContentTypeTargets(65, 80, 10, 14, "low"),
31
+ "battle_card": ContentTypeTargets(45, 60, 12, 18, "medium-high"),
32
+ }
33
+
34
+
35
+ def load_style(paths: ProjectPaths) -> str:
36
+ """Return the full text of `.devrel/style.md`, or "" if missing."""
37
+ if not paths.style_file.is_file():
38
+ return ""
39
+ return paths.style_file.read_text(encoding="utf-8")
40
+
41
+
42
+ _RANGE_RE = re.compile(r"^\s*(\d+)\s*[–-]\s*(\d+)")
43
+
44
+
45
+ def _parse_range(s: str) -> tuple[int, int] | None:
46
+ m = _RANGE_RE.match(s)
47
+ if not m:
48
+ return None
49
+ return int(m.group(1)), int(m.group(2))
50
+
51
+
52
+ def _normalize_name(s: str) -> str:
53
+ return re.sub(r"[^a-z0-9]+", "_", s.strip().lower()).strip("_")
54
+
55
+
56
+ def parse_targets(md: str) -> dict[str, ContentTypeTargets]:
57
+ """Parse the per-content-type table in style.md. Looks for the first
58
+ pipe-table whose header row contains 'Flesch' (case-insensitive) and
59
+ 'Jargon'. Returns a snake_case-keyed dict of ContentTypeTargets.
60
+ """
61
+ lines = md.splitlines()
62
+ out: dict[str, ContentTypeTargets] = {}
63
+ in_table = False
64
+ header_seen = False
65
+ for raw in lines:
66
+ line = raw.strip()
67
+ if not line.startswith("|"):
68
+ if in_table:
69
+ break
70
+ continue
71
+ if not header_seen:
72
+ lower = line.lower()
73
+ if "jargon" in lower and ("flesch" in lower or "f-k" in lower or "sentence" in lower):
74
+ header_seen = True
75
+ in_table = True
76
+ continue
77
+ # Skip the markdown separator row (|---|---|...).
78
+ if set(line.replace("|", "").strip()) <= set("- "):
79
+ continue
80
+ cells = [c.strip() for c in line.strip().strip("|").split("|")]
81
+ if len(cells) < 4:
82
+ continue
83
+ name_cell, flesch_cell, sentence_cell, jargon_cell = cells[:4]
84
+ flesch = _parse_range(flesch_cell)
85
+ sentence = _parse_range(sentence_cell)
86
+ if flesch is None or sentence is None:
87
+ continue
88
+ name = _normalize_name(name_cell)
89
+ if not name:
90
+ continue
91
+ out[name] = ContentTypeTargets(
92
+ flesch_min=flesch[0],
93
+ flesch_max=flesch[1],
94
+ sentence_len_min=sentence[0],
95
+ sentence_len_max=sentence[1],
96
+ jargon_density=jargon_cell,
97
+ )
98
+ return out
99
+
100
+
101
+ def get_targets(content_type: str, md: str) -> ContentTypeTargets:
102
+ """Resolve targets for a content type: prefer parsed style.md table,
103
+ then fall back to DEFAULT_TARGETS. Raises KeyError if neither source
104
+ has the type."""
105
+ parsed = parse_targets(md)
106
+ if content_type in parsed:
107
+ return parsed[content_type]
108
+ if content_type in DEFAULT_TARGETS:
109
+ return DEFAULT_TARGETS[content_type]
110
+ raise KeyError(f"Unknown content_type: {content_type!r}")
@@ -0,0 +1,15 @@
1
+ """Load voice.md as a single string for prompt injection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from devrel_origin.project.paths import ProjectPaths
6
+
7
+
8
+ def load_voice(paths: ProjectPaths) -> str:
9
+ """Return the full text of `.devrel/voice.md`, or "" if the file is
10
+ missing. The orchestrator injects this verbatim into editorial-stage
11
+ system prompts as the project's voice contract.
12
+ """
13
+ if not paths.voice_file.is_file():
14
+ return ""
15
+ return paths.voice_file.read_text(encoding="utf-8")
@@ -0,0 +1,9 @@
1
+ """
2
+ Tools module — API clients, GitHub integration, search, notifications, and MCP server.
3
+ """
4
+
5
+ from devrel_origin.tools.api_client import PostHogClient
6
+ from devrel_origin.tools.github_tools import GitHubTools
7
+ from devrel_origin.tools.search_tools import SearchTools
8
+
9
+ __all__ = ["PostHogClient", "GitHubTools", "SearchTools"]
@@ -0,0 +1,304 @@
1
+ """Argus data collectors — one class per source.
2
+
3
+ Each collector exposes a single async method ``collect(period)`` returning
4
+ ``list[PerformanceMetric]``. Collectors do not raise — failures are logged
5
+ and an empty list is returned, so Argus can mark the source unhealthy in
6
+ ``PerformanceReport.sources_ok`` without aborting the whole report.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import logging
13
+ import sqlite3
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import TYPE_CHECKING
17
+ from urllib.parse import urlparse
18
+
19
+ from devrel_origin.core.argus import ContentType, PerformanceMetric
20
+
21
+ if TYPE_CHECKING:
22
+ from devrel_origin.tools.api_client import PostHogClient
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ Period = tuple[datetime, datetime]
27
+
28
+ _LANDING_PATHS: frozenset[str] = frozenset(
29
+ {"/", "/pricing", "/about", "/contact", "/features", "/docs"}
30
+ )
31
+
32
+
33
+ def _classify_url(url: str) -> ContentType:
34
+ """Heuristic: /blog/* → blog; root + configured marketing paths → landing."""
35
+ path = urlparse(url).path or "/"
36
+ if path in _LANDING_PATHS:
37
+ return "landing"
38
+ if path.startswith("/blog/"):
39
+ return "blog"
40
+ return "landing"
41
+
42
+
43
+ def _content_id_from_url(url: str) -> str:
44
+ """Stable id derived from URL path."""
45
+ path = urlparse(url).path or "/"
46
+ if path.startswith("/blog/"):
47
+ slug = path[len("/blog/") :].rstrip("/")
48
+ return f"blog/{slug}" if slug else "blog/index"
49
+ return path
50
+
51
+
52
+ class PostHogCollector:
53
+ """Pulls page-view + unique-visitor counts from PostHog grouped by URL."""
54
+
55
+ def __init__(self, client: "PostHogClient"):
56
+ self.client = client
57
+
58
+ async def collect(self, period: Period) -> list[PerformanceMetric]:
59
+ _start, end = period
60
+ try:
61
+ rows = await self.client.fetch_events_by_url(start=_start, end=end)
62
+ except Exception as exc: # noqa: BLE001
63
+ logger.warning("PostHogCollector failed: %s", exc)
64
+ return []
65
+
66
+ metrics: list[PerformanceMetric] = []
67
+ for row in rows:
68
+ url = row.get("url", "")
69
+ if not url:
70
+ continue
71
+ metrics.append(
72
+ PerformanceMetric(
73
+ content_id=_content_id_from_url(url),
74
+ content_type=_classify_url(url),
75
+ title=row.get("title") or url,
76
+ url=url,
77
+ published_at=end,
78
+ primary_metric=float(row.get("page_views", 0) or 0),
79
+ metric_name="page_views",
80
+ secondary_metrics={
81
+ "unique_visitors": float(row.get("unique_visitors", 0) or 0),
82
+ },
83
+ )
84
+ )
85
+ return metrics
86
+
87
+
88
+ class GitHubCollector:
89
+ """Emits one PerformanceMetric per repo with stars_delta as primary KPI.
90
+
91
+ Wrapped client is expected to expose ``repo_full_name: str`` and
92
+ ``async get_repo_stats() -> dict`` with at minimum
93
+ ``stars, forks, open_issues, stars_delta_7d, issues_closed_7d``.
94
+ """
95
+
96
+ def __init__(self, client):
97
+ self.client = client
98
+
99
+ async def collect(self, period: Period) -> list[PerformanceMetric]:
100
+ _start, end = period
101
+ try:
102
+ stats = await self.client.get_repo_stats()
103
+ except Exception as exc: # noqa: BLE001
104
+ logger.warning("GitHubCollector failed: %s", exc)
105
+ return []
106
+
107
+ repo = getattr(self.client, "repo_full_name", "unknown/unknown")
108
+ return [
109
+ PerformanceMetric(
110
+ content_id=f"repo/{repo}",
111
+ content_type="repo",
112
+ title=repo,
113
+ url=f"https://github.com/{repo}",
114
+ published_at=end,
115
+ primary_metric=float(stats.get("stars_delta_7d", 0) or 0),
116
+ metric_name="stars_delta",
117
+ secondary_metrics={
118
+ "stars_total": float(stats.get("stars", 0) or 0),
119
+ "forks": float(stats.get("forks", 0) or 0),
120
+ "open_issues": float(stats.get("open_issues", 0) or 0),
121
+ "issues_closed": float(stats.get("issues_closed_7d", 0) or 0),
122
+ },
123
+ )
124
+ ]
125
+
126
+
127
+ def _row_in_period(row: dict, start: datetime, end: datetime) -> bool:
128
+ """True if row's created_at/updated_at falls in [start, end].
129
+
130
+ Falls back to True (include the row) when neither timestamp is present —
131
+ older Instantly campaigns may not include either field. Without this
132
+ filter, --since 7d and --since 90d return identical metrics.
133
+ """
134
+ raw = row.get("created_at") or row.get("updated_at")
135
+ if not raw:
136
+ return True
137
+ try:
138
+ when = datetime.fromisoformat(str(raw).replace("Z", "+00:00"))
139
+ except ValueError:
140
+ return True
141
+ return start <= when <= end
142
+
143
+
144
+ class InstantlyCollector:
145
+ """One PerformanceMetric per email campaign; reply_rate is primary KPI.
146
+
147
+ Wrapped client is expected to expose
148
+ ``async list_campaigns_with_analytics() -> list[dict]`` with at minimum
149
+ ``id, name, sent, opens, clicks, replies, open_rate, reply_rate``,
150
+ and optionally ``created_at`` / ``updated_at`` for period filtering.
151
+ """
152
+
153
+ def __init__(self, client):
154
+ self.client = client
155
+
156
+ async def collect(self, period: Period) -> list[PerformanceMetric]:
157
+ start, end = period
158
+ try:
159
+ rows = await self.client.list_campaigns_with_analytics()
160
+ except Exception as exc: # noqa: BLE001
161
+ logger.warning("InstantlyCollector failed: %s", exc)
162
+ return []
163
+
164
+ metrics: list[PerformanceMetric] = []
165
+ for row in rows:
166
+ cid = row.get("id") or ""
167
+ if not cid:
168
+ continue
169
+ if not _row_in_period(row, start, end):
170
+ continue
171
+ metrics.append(
172
+ PerformanceMetric(
173
+ content_id=f"email/{cid}",
174
+ content_type="email",
175
+ title=row.get("name", cid),
176
+ url=None,
177
+ published_at=end,
178
+ primary_metric=float(row.get("reply_rate", 0.0) or 0.0),
179
+ metric_name="reply_rate",
180
+ secondary_metrics={
181
+ "sent": float(row.get("sent", 0) or 0),
182
+ "opens": float(row.get("opens", 0) or 0),
183
+ "clicks": float(row.get("clicks", 0) or 0),
184
+ "replies": float(row.get("replies", 0) or 0),
185
+ "open_rate": float(row.get("open_rate", 0.0) or 0.0),
186
+ },
187
+ )
188
+ )
189
+ return metrics
190
+
191
+
192
+ class SocialCollector:
193
+ """Reads Echo's ``social_mentions`` table, filters to ``is_own_post=1``,
194
+ emits one metric per post with engagement_score as the primary KPI.
195
+
196
+ Returns an empty list (and logs) if the table is missing or the period
197
+ yields no rows. Does not raise.
198
+ """
199
+
200
+ # Pinned schema contract: these columns MUST exist on Echo's
201
+ # social_mentions table. If a future Echo migration renames any of
202
+ # these, _verify_schema logs a clear warning and the collector returns
203
+ # [] rather than silently producing partial data.
204
+ _REQUIRED_COLUMNS: frozenset[str] = frozenset(
205
+ {
206
+ "platform",
207
+ "post_id",
208
+ "title",
209
+ "url",
210
+ "posted_at",
211
+ "upvotes",
212
+ "comments",
213
+ "engagement_score",
214
+ "is_own_post",
215
+ }
216
+ )
217
+
218
+ def __init__(self, state_db_path: Path):
219
+ self.state_db_path = state_db_path
220
+ self._schema_verified = False
221
+
222
+ def _verify_schema(self, conn: sqlite3.Connection) -> bool:
223
+ """Confirm social_mentions has all required columns.
224
+
225
+ Cached per instance via ``self._schema_verified`` so we only PRAGMA
226
+ once. Logs a single clear warning if columns are missing/renamed.
227
+ """
228
+ if self._schema_verified:
229
+ return True
230
+ try:
231
+ cols = {row[1] for row in conn.execute("PRAGMA table_info(social_mentions)")}
232
+ except sqlite3.OperationalError:
233
+ return False # table doesn't exist yet
234
+ missing = self._REQUIRED_COLUMNS - cols
235
+ if missing:
236
+ logger.warning(
237
+ "SocialCollector: Echo's social_mentions table is missing "
238
+ "required columns: %s. Argus will return no social metrics "
239
+ "until the schema is updated.",
240
+ sorted(missing),
241
+ )
242
+ return False
243
+ self._schema_verified = True
244
+ return True
245
+
246
+ def _read_rows(self, start_iso: str, end_iso: str):
247
+ """Synchronous SQLite read; called via asyncio.to_thread to avoid
248
+ stalling the event loop. Returns None on any failure (the async
249
+ wrapper translates that to an empty result + logs)."""
250
+ try:
251
+ with sqlite3.connect(self.state_db_path) as conn:
252
+ conn.row_factory = sqlite3.Row
253
+ if not self._verify_schema(conn):
254
+ return None
255
+ return conn.execute(
256
+ "SELECT platform, post_id, title, url, posted_at, "
257
+ "upvotes, comments, engagement_score "
258
+ "FROM social_mentions "
259
+ "WHERE is_own_post = 1 AND posted_at >= ? AND posted_at <= ?",
260
+ (start_iso, end_iso),
261
+ ).fetchall()
262
+ except sqlite3.OperationalError as exc:
263
+ logger.info("SocialCollector: %s", exc)
264
+ return None
265
+ except Exception as exc: # noqa: BLE001
266
+ logger.warning("SocialCollector failed: %s", exc)
267
+ return None
268
+
269
+ async def collect(self, period: Period) -> list[PerformanceMetric]:
270
+ start, end = period
271
+ if not self.state_db_path.is_file():
272
+ logger.info("SocialCollector: state.db not present, skipping")
273
+ return []
274
+
275
+ rows = await asyncio.to_thread(
276
+ self._read_rows,
277
+ start.isoformat(),
278
+ end.isoformat(),
279
+ )
280
+ if rows is None:
281
+ return []
282
+
283
+ metrics: list[PerformanceMetric] = []
284
+ for row in rows:
285
+ try:
286
+ posted_at = datetime.fromisoformat(row["posted_at"].replace("Z", "+00:00"))
287
+ except ValueError:
288
+ posted_at = end
289
+ metrics.append(
290
+ PerformanceMetric(
291
+ content_id=f"social/{row['platform']}/{row['post_id']}",
292
+ content_type="social",
293
+ title=row["title"] or row["post_id"],
294
+ url=row["url"],
295
+ published_at=posted_at,
296
+ primary_metric=float(row["engagement_score"] or 0.0),
297
+ metric_name="engagement_score",
298
+ secondary_metrics={
299
+ "upvotes": float(row["upvotes"] or 0),
300
+ "comments": float(row["comments"] or 0),
301
+ },
302
+ )
303
+ )
304
+ return metrics