devrel-origin 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devrel_origin/__init__.py +15 -0
- devrel_origin/cli/__init__.py +92 -0
- devrel_origin/cli/_common.py +243 -0
- devrel_origin/cli/analytics.py +28 -0
- devrel_origin/cli/argus.py +497 -0
- devrel_origin/cli/auth.py +227 -0
- devrel_origin/cli/config.py +108 -0
- devrel_origin/cli/content.py +259 -0
- devrel_origin/cli/cost.py +108 -0
- devrel_origin/cli/cro.py +298 -0
- devrel_origin/cli/deliverables.py +65 -0
- devrel_origin/cli/docs.py +91 -0
- devrel_origin/cli/doctor.py +178 -0
- devrel_origin/cli/experiment.py +29 -0
- devrel_origin/cli/growth.py +97 -0
- devrel_origin/cli/init.py +472 -0
- devrel_origin/cli/intel.py +27 -0
- devrel_origin/cli/kb.py +96 -0
- devrel_origin/cli/listen.py +31 -0
- devrel_origin/cli/marketing.py +66 -0
- devrel_origin/cli/migrate.py +45 -0
- devrel_origin/cli/run.py +46 -0
- devrel_origin/cli/sales.py +57 -0
- devrel_origin/cli/schedule.py +62 -0
- devrel_origin/cli/synthesize.py +28 -0
- devrel_origin/cli/triage.py +29 -0
- devrel_origin/cli/video.py +35 -0
- devrel_origin/core/__init__.py +58 -0
- devrel_origin/core/agent_config.py +75 -0
- devrel_origin/core/argus.py +964 -0
- devrel_origin/core/atlas.py +1450 -0
- devrel_origin/core/base.py +372 -0
- devrel_origin/core/cyra.py +563 -0
- devrel_origin/core/dex.py +708 -0
- devrel_origin/core/echo.py +614 -0
- devrel_origin/core/growth/__init__.py +27 -0
- devrel_origin/core/growth/recommendations.py +219 -0
- devrel_origin/core/growth/target_kinds.py +51 -0
- devrel_origin/core/iris.py +513 -0
- devrel_origin/core/kai.py +1367 -0
- devrel_origin/core/llm.py +542 -0
- devrel_origin/core/llm_backends.py +274 -0
- devrel_origin/core/mox.py +514 -0
- devrel_origin/core/nova.py +349 -0
- devrel_origin/core/pax.py +1205 -0
- devrel_origin/core/rex.py +532 -0
- devrel_origin/core/sage.py +486 -0
- devrel_origin/core/sentinel.py +385 -0
- devrel_origin/core/types.py +98 -0
- devrel_origin/core/video/__init__.py +22 -0
- devrel_origin/core/video/assembler.py +131 -0
- devrel_origin/core/video/browser_recorder.py +118 -0
- devrel_origin/core/video/desktop_recorder.py +254 -0
- devrel_origin/core/video/overlay_renderer.py +143 -0
- devrel_origin/core/video/script_parser.py +147 -0
- devrel_origin/core/video/tts_engine.py +82 -0
- devrel_origin/core/vox.py +268 -0
- devrel_origin/core/watchdog.py +321 -0
- devrel_origin/project/__init__.py +1 -0
- devrel_origin/project/config.py +75 -0
- devrel_origin/project/cost_sink.py +61 -0
- devrel_origin/project/init.py +104 -0
- devrel_origin/project/paths.py +75 -0
- devrel_origin/project/state.py +241 -0
- devrel_origin/project/templates/__init__.py +4 -0
- devrel_origin/project/templates/config.toml +24 -0
- devrel_origin/project/templates/devrel.gitignore +10 -0
- devrel_origin/project/templates/slop-blocklist.md +45 -0
- devrel_origin/project/templates/style.md +24 -0
- devrel_origin/project/templates/voice.md +29 -0
- devrel_origin/quality/__init__.py +66 -0
- devrel_origin/quality/editorial.py +357 -0
- devrel_origin/quality/persona.py +84 -0
- devrel_origin/quality/readability.py +148 -0
- devrel_origin/quality/slop.py +167 -0
- devrel_origin/quality/style.py +110 -0
- devrel_origin/quality/voice.py +15 -0
- devrel_origin/tools/__init__.py +9 -0
- devrel_origin/tools/analytics.py +304 -0
- devrel_origin/tools/api_client.py +393 -0
- devrel_origin/tools/apollo_client.py +305 -0
- devrel_origin/tools/code_validator.py +428 -0
- devrel_origin/tools/github_tools.py +297 -0
- devrel_origin/tools/instantly_client.py +412 -0
- devrel_origin/tools/kb_harvester.py +340 -0
- devrel_origin/tools/mcp_server.py +578 -0
- devrel_origin/tools/notifications.py +245 -0
- devrel_origin/tools/run_report.py +193 -0
- devrel_origin/tools/scheduler.py +231 -0
- devrel_origin/tools/search_tools.py +321 -0
- devrel_origin/tools/self_improve.py +168 -0
- devrel_origin/tools/sheets.py +236 -0
- devrel_origin-0.2.14.dist-info/METADATA +354 -0
- devrel_origin-0.2.14.dist-info/RECORD +98 -0
- devrel_origin-0.2.14.dist-info/WHEEL +5 -0
- devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
- devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
- devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Anti-slop pipeline stage 5.
|
|
2
|
+
|
|
3
|
+
Three-step matching:
|
|
4
|
+
1. Regex blocklist (deterministic, fast). Word-boundary, case-insensitive.
|
|
5
|
+
2. LLM lint (Haiku). Catches context-sensitive slop the regex misses
|
|
6
|
+
(verbose intros, vague intensifiers in unusual phrasings).
|
|
7
|
+
3. Force-rewrite (Sonnet). One targeted rewrite call with all hits listed.
|
|
8
|
+
If the rewrite still trips the blocklist on re-check, the orchestrator
|
|
9
|
+
aborts loud, see editorial.py.
|
|
10
|
+
|
|
11
|
+
Haiku's lint output occasionally hallucinates phrases that don't appear in
|
|
12
|
+
the source (verbatim from a real abort: 'replace this blockquote', 'replace
|
|
13
|
+
with' in a draft that contained neither). Hallucinated phrases would then go
|
|
14
|
+
into force_rewrite's flagged list and re-appear on the post-rewrite re-check,
|
|
15
|
+
falsely tripping the abort-loud condition. _verify_lint_hits filters out any
|
|
16
|
+
phrase that does not actually appear in the text (case-insensitive substring),
|
|
17
|
+
so only real slop survives into the rewrite + abort loop.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import re
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class SlopHit:
|
|
31
|
+
phrase: str
|
|
32
|
+
start: int
|
|
33
|
+
end: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def parse_blocklist(md: str) -> list[str]:
|
|
37
|
+
"""Parse `slop-blocklist.md`. Returns lowercased phrases, one per
|
|
38
|
+
non-comment, non-blank line. Lines starting with `#` are comments."""
|
|
39
|
+
out: list[str] = []
|
|
40
|
+
for raw in md.splitlines():
|
|
41
|
+
line = raw.strip()
|
|
42
|
+
if not line or line.startswith("#"):
|
|
43
|
+
continue
|
|
44
|
+
out.append(line.lower())
|
|
45
|
+
return out
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def find_slop(text: str, blocklist: list[str]) -> list[SlopHit]:
|
|
49
|
+
"""Word-boundary, case-insensitive regex match. Returns one hit per
|
|
50
|
+
occurrence, in order."""
|
|
51
|
+
hits: list[SlopHit] = []
|
|
52
|
+
text_lower = text.lower()
|
|
53
|
+
for phrase in blocklist:
|
|
54
|
+
pattern = r"\b" + re.escape(phrase) + r"\b"
|
|
55
|
+
for m in re.finditer(pattern, text_lower):
|
|
56
|
+
hits.append(SlopHit(phrase=phrase, start=m.start(), end=m.end()))
|
|
57
|
+
hits.sort(key=lambda h: h.start)
|
|
58
|
+
return hits
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
_LINT_SYSTEM = (
|
|
62
|
+
"You are an editor screening AI-written content for tells the regex "
|
|
63
|
+
"blocklist would miss: verbose intros, vague intensifiers in unusual "
|
|
64
|
+
"phrasings, hedging that doesn't appear in the blocklist verbatim. "
|
|
65
|
+
"Return a flat list, one phrase per line, lowercase, no bullets, no "
|
|
66
|
+
"explanations. If nothing concerning, return an empty response."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _normalize_lint_lines(raw: str) -> list[str]:
|
|
71
|
+
out: list[str] = []
|
|
72
|
+
for line in raw.splitlines():
|
|
73
|
+
s = line.strip()
|
|
74
|
+
if not s or s.startswith("#"):
|
|
75
|
+
continue
|
|
76
|
+
# Strip leading bullets / ordinals.
|
|
77
|
+
s = re.sub(r"^[\-\*•\d\.\)]+\s*", "", s)
|
|
78
|
+
if s:
|
|
79
|
+
out.append(s.lower())
|
|
80
|
+
return out
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _verify_lint_hits(text: str, candidates: list[str]) -> list[str]:
|
|
84
|
+
"""Drop hallucinated lint hits (phrases Haiku flagged that don't appear).
|
|
85
|
+
|
|
86
|
+
Case-insensitive substring match. We deliberately don't require word
|
|
87
|
+
boundaries because Haiku sometimes returns slightly truncated phrases
|
|
88
|
+
(e.g. 'in essence' for the substring 'in essence,'); a substring match
|
|
89
|
+
accepts those while still rejecting fully-fabricated phrases. Logs the
|
|
90
|
+
filtered set at INFO so we can monitor Haiku's hallucination rate without
|
|
91
|
+
polluting user-facing output.
|
|
92
|
+
"""
|
|
93
|
+
text_lower = text.lower()
|
|
94
|
+
real: list[str] = []
|
|
95
|
+
hallucinated: list[str] = []
|
|
96
|
+
for phrase in candidates:
|
|
97
|
+
if phrase and phrase in text_lower:
|
|
98
|
+
real.append(phrase)
|
|
99
|
+
else:
|
|
100
|
+
hallucinated.append(phrase)
|
|
101
|
+
if hallucinated:
|
|
102
|
+
logger.info(
|
|
103
|
+
"slop_lint_filtered_hallucinations",
|
|
104
|
+
extra={
|
|
105
|
+
"filtered_count": len(hallucinated),
|
|
106
|
+
"kept_count": len(real),
|
|
107
|
+
"filtered": hallucinated[:10], # cap log size
|
|
108
|
+
},
|
|
109
|
+
)
|
|
110
|
+
return real
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
async def llm_lint(text: str, voice: str, llm_client) -> list[str]:
|
|
114
|
+
"""Haiku-powered second-pass slop detector.
|
|
115
|
+
|
|
116
|
+
Verifies each Haiku-flagged phrase actually appears in the source so
|
|
117
|
+
hallucinated flags don't propagate into force_rewrite or trigger the
|
|
118
|
+
orchestrator's post-rewrite abort-loud check.
|
|
119
|
+
"""
|
|
120
|
+
user = (
|
|
121
|
+
"Voice contract for this product:\n\n" + (voice or "(none)") + "\n\n"
|
|
122
|
+
"Content to screen:\n\n" + text + "\n\n"
|
|
123
|
+
"List the phrases that read as AI-written, one per line. Empty if clean. "
|
|
124
|
+
"Only list phrases that appear verbatim in the content above, exactly as "
|
|
125
|
+
"they appear there; do not paraphrase, abbreviate, or invent phrases."
|
|
126
|
+
)
|
|
127
|
+
raw = await llm_client.generate(
|
|
128
|
+
system_prompt=_LINT_SYSTEM,
|
|
129
|
+
user_prompt=user,
|
|
130
|
+
model="haiku",
|
|
131
|
+
)
|
|
132
|
+
return _verify_lint_hits(text, _normalize_lint_lines(raw))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
_REWRITE_SYSTEM = (
|
|
136
|
+
"You are a rewrite editor. The reader has flagged specific phrases as "
|
|
137
|
+
"AI-written. Rewrite the content so none of the flagged phrases (or "
|
|
138
|
+
"their close synonyms) appear, while preserving meaning, structure, "
|
|
139
|
+
"and the project's voice. Return only the rewritten content — no "
|
|
140
|
+
"preamble, no explanation."
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
async def force_rewrite(
|
|
145
|
+
text: str,
|
|
146
|
+
regex_hits: list[SlopHit],
|
|
147
|
+
llm_lint_hits: list[str],
|
|
148
|
+
voice: str,
|
|
149
|
+
llm_client,
|
|
150
|
+
) -> str:
|
|
151
|
+
"""Single Sonnet rewrite with the full flagged list. Caller is
|
|
152
|
+
responsible for re-running `find_slop` + `llm_lint` to verify the
|
|
153
|
+
rewrite cleared the issues."""
|
|
154
|
+
flagged = sorted({h.phrase for h in regex_hits} | set(llm_lint_hits))
|
|
155
|
+
flagged_listing = "\n".join(f"- {p}" for p in flagged)
|
|
156
|
+
user = (
|
|
157
|
+
"Voice contract:\n\n" + (voice or "(none)") + "\n\n"
|
|
158
|
+
"Flagged phrases (do not let any of these appear in the rewrite, "
|
|
159
|
+
"and avoid close synonyms):\n\n" + flagged_listing + "\n\n"
|
|
160
|
+
"Original content:\n\n" + text
|
|
161
|
+
)
|
|
162
|
+
rewritten = await llm_client.generate(
|
|
163
|
+
system_prompt=_REWRITE_SYSTEM,
|
|
164
|
+
user_prompt=user,
|
|
165
|
+
model="sonnet",
|
|
166
|
+
)
|
|
167
|
+
return rewritten.strip()
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Load style.md and parse the per-content-type targets table.
|
|
2
|
+
|
|
3
|
+
Content type names are normalized to snake_case for keying (e.g.,
|
|
4
|
+
"Blog post" -> "blog_post"). Targets parsing is best-effort: malformed
|
|
5
|
+
rows are skipped. If the file or table is missing, callers fall back to
|
|
6
|
+
DEFAULT_TARGETS.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
from devrel_origin.project.paths import ProjectPaths
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class ContentTypeTargets:
|
|
19
|
+
flesch_min: int
|
|
20
|
+
flesch_max: int
|
|
21
|
+
sentence_len_min: int
|
|
22
|
+
sentence_len_max: int
|
|
23
|
+
jargon_density: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
DEFAULT_TARGETS: dict[str, ContentTypeTargets] = {
|
|
27
|
+
"tutorial": ContentTypeTargets(50, 65, 12, 18, "medium"),
|
|
28
|
+
"blog_post": ContentTypeTargets(55, 70, 12, 20, "low-medium"),
|
|
29
|
+
"landing_page": ContentTypeTargets(60, 75, 10, 15, "low"),
|
|
30
|
+
"cold_email": ContentTypeTargets(65, 80, 10, 14, "low"),
|
|
31
|
+
"battle_card": ContentTypeTargets(45, 60, 12, 18, "medium-high"),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_style(paths: ProjectPaths) -> str:
|
|
36
|
+
"""Return the full text of `.devrel/style.md`, or "" if missing."""
|
|
37
|
+
if not paths.style_file.is_file():
|
|
38
|
+
return ""
|
|
39
|
+
return paths.style_file.read_text(encoding="utf-8")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_RANGE_RE = re.compile(r"^\s*(\d+)\s*[–-]\s*(\d+)")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _parse_range(s: str) -> tuple[int, int] | None:
|
|
46
|
+
m = _RANGE_RE.match(s)
|
|
47
|
+
if not m:
|
|
48
|
+
return None
|
|
49
|
+
return int(m.group(1)), int(m.group(2))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _normalize_name(s: str) -> str:
|
|
53
|
+
return re.sub(r"[^a-z0-9]+", "_", s.strip().lower()).strip("_")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def parse_targets(md: str) -> dict[str, ContentTypeTargets]:
|
|
57
|
+
"""Parse the per-content-type table in style.md. Looks for the first
|
|
58
|
+
pipe-table whose header row contains 'Flesch' (case-insensitive) and
|
|
59
|
+
'Jargon'. Returns a snake_case-keyed dict of ContentTypeTargets.
|
|
60
|
+
"""
|
|
61
|
+
lines = md.splitlines()
|
|
62
|
+
out: dict[str, ContentTypeTargets] = {}
|
|
63
|
+
in_table = False
|
|
64
|
+
header_seen = False
|
|
65
|
+
for raw in lines:
|
|
66
|
+
line = raw.strip()
|
|
67
|
+
if not line.startswith("|"):
|
|
68
|
+
if in_table:
|
|
69
|
+
break
|
|
70
|
+
continue
|
|
71
|
+
if not header_seen:
|
|
72
|
+
lower = line.lower()
|
|
73
|
+
if "jargon" in lower and ("flesch" in lower or "f-k" in lower or "sentence" in lower):
|
|
74
|
+
header_seen = True
|
|
75
|
+
in_table = True
|
|
76
|
+
continue
|
|
77
|
+
# Skip the markdown separator row (|---|---|...).
|
|
78
|
+
if set(line.replace("|", "").strip()) <= set("- "):
|
|
79
|
+
continue
|
|
80
|
+
cells = [c.strip() for c in line.strip().strip("|").split("|")]
|
|
81
|
+
if len(cells) < 4:
|
|
82
|
+
continue
|
|
83
|
+
name_cell, flesch_cell, sentence_cell, jargon_cell = cells[:4]
|
|
84
|
+
flesch = _parse_range(flesch_cell)
|
|
85
|
+
sentence = _parse_range(sentence_cell)
|
|
86
|
+
if flesch is None or sentence is None:
|
|
87
|
+
continue
|
|
88
|
+
name = _normalize_name(name_cell)
|
|
89
|
+
if not name:
|
|
90
|
+
continue
|
|
91
|
+
out[name] = ContentTypeTargets(
|
|
92
|
+
flesch_min=flesch[0],
|
|
93
|
+
flesch_max=flesch[1],
|
|
94
|
+
sentence_len_min=sentence[0],
|
|
95
|
+
sentence_len_max=sentence[1],
|
|
96
|
+
jargon_density=jargon_cell,
|
|
97
|
+
)
|
|
98
|
+
return out
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_targets(content_type: str, md: str) -> ContentTypeTargets:
|
|
102
|
+
"""Resolve targets for a content type: prefer parsed style.md table,
|
|
103
|
+
then fall back to DEFAULT_TARGETS. Raises KeyError if neither source
|
|
104
|
+
has the type."""
|
|
105
|
+
parsed = parse_targets(md)
|
|
106
|
+
if content_type in parsed:
|
|
107
|
+
return parsed[content_type]
|
|
108
|
+
if content_type in DEFAULT_TARGETS:
|
|
109
|
+
return DEFAULT_TARGETS[content_type]
|
|
110
|
+
raise KeyError(f"Unknown content_type: {content_type!r}")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Load voice.md as a single string for prompt injection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from devrel_origin.project.paths import ProjectPaths
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_voice(paths: ProjectPaths) -> str:
|
|
9
|
+
"""Return the full text of `.devrel/voice.md`, or "" if the file is
|
|
10
|
+
missing. The orchestrator injects this verbatim into editorial-stage
|
|
11
|
+
system prompts as the project's voice contract.
|
|
12
|
+
"""
|
|
13
|
+
if not paths.voice_file.is_file():
|
|
14
|
+
return ""
|
|
15
|
+
return paths.voice_file.read_text(encoding="utf-8")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tools module — API clients, GitHub integration, search, notifications, and MCP server.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from devrel_origin.tools.api_client import PostHogClient
|
|
6
|
+
from devrel_origin.tools.github_tools import GitHubTools
|
|
7
|
+
from devrel_origin.tools.search_tools import SearchTools
|
|
8
|
+
|
|
9
|
+
__all__ = ["PostHogClient", "GitHubTools", "SearchTools"]
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Argus data collectors — one class per source.
|
|
2
|
+
|
|
3
|
+
Each collector exposes a single async method ``collect(period)`` returning
|
|
4
|
+
``list[PerformanceMetric]``. Collectors do not raise — failures are logged
|
|
5
|
+
and an empty list is returned, so Argus can mark the source unhealthy in
|
|
6
|
+
``PerformanceReport.sources_ok`` without aborting the whole report.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
import sqlite3
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
from urllib.parse import urlparse
|
|
18
|
+
|
|
19
|
+
from devrel_origin.core.argus import ContentType, PerformanceMetric
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from devrel_origin.tools.api_client import PostHogClient
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
Period = tuple[datetime, datetime]
|
|
27
|
+
|
|
28
|
+
_LANDING_PATHS: frozenset[str] = frozenset(
|
|
29
|
+
{"/", "/pricing", "/about", "/contact", "/features", "/docs"}
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _classify_url(url: str) -> ContentType:
|
|
34
|
+
"""Heuristic: /blog/* → blog; root + configured marketing paths → landing."""
|
|
35
|
+
path = urlparse(url).path or "/"
|
|
36
|
+
if path in _LANDING_PATHS:
|
|
37
|
+
return "landing"
|
|
38
|
+
if path.startswith("/blog/"):
|
|
39
|
+
return "blog"
|
|
40
|
+
return "landing"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _content_id_from_url(url: str) -> str:
|
|
44
|
+
"""Stable id derived from URL path."""
|
|
45
|
+
path = urlparse(url).path or "/"
|
|
46
|
+
if path.startswith("/blog/"):
|
|
47
|
+
slug = path[len("/blog/") :].rstrip("/")
|
|
48
|
+
return f"blog/{slug}" if slug else "blog/index"
|
|
49
|
+
return path
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class PostHogCollector:
|
|
53
|
+
"""Pulls page-view + unique-visitor counts from PostHog grouped by URL."""
|
|
54
|
+
|
|
55
|
+
def __init__(self, client: "PostHogClient"):
|
|
56
|
+
self.client = client
|
|
57
|
+
|
|
58
|
+
async def collect(self, period: Period) -> list[PerformanceMetric]:
|
|
59
|
+
_start, end = period
|
|
60
|
+
try:
|
|
61
|
+
rows = await self.client.fetch_events_by_url(start=_start, end=end)
|
|
62
|
+
except Exception as exc: # noqa: BLE001
|
|
63
|
+
logger.warning("PostHogCollector failed: %s", exc)
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
metrics: list[PerformanceMetric] = []
|
|
67
|
+
for row in rows:
|
|
68
|
+
url = row.get("url", "")
|
|
69
|
+
if not url:
|
|
70
|
+
continue
|
|
71
|
+
metrics.append(
|
|
72
|
+
PerformanceMetric(
|
|
73
|
+
content_id=_content_id_from_url(url),
|
|
74
|
+
content_type=_classify_url(url),
|
|
75
|
+
title=row.get("title") or url,
|
|
76
|
+
url=url,
|
|
77
|
+
published_at=end,
|
|
78
|
+
primary_metric=float(row.get("page_views", 0) or 0),
|
|
79
|
+
metric_name="page_views",
|
|
80
|
+
secondary_metrics={
|
|
81
|
+
"unique_visitors": float(row.get("unique_visitors", 0) or 0),
|
|
82
|
+
},
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
return metrics
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class GitHubCollector:
|
|
89
|
+
"""Emits one PerformanceMetric per repo with stars_delta as primary KPI.
|
|
90
|
+
|
|
91
|
+
Wrapped client is expected to expose ``repo_full_name: str`` and
|
|
92
|
+
``async get_repo_stats() -> dict`` with at minimum
|
|
93
|
+
``stars, forks, open_issues, stars_delta_7d, issues_closed_7d``.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(self, client):
|
|
97
|
+
self.client = client
|
|
98
|
+
|
|
99
|
+
async def collect(self, period: Period) -> list[PerformanceMetric]:
|
|
100
|
+
_start, end = period
|
|
101
|
+
try:
|
|
102
|
+
stats = await self.client.get_repo_stats()
|
|
103
|
+
except Exception as exc: # noqa: BLE001
|
|
104
|
+
logger.warning("GitHubCollector failed: %s", exc)
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
repo = getattr(self.client, "repo_full_name", "unknown/unknown")
|
|
108
|
+
return [
|
|
109
|
+
PerformanceMetric(
|
|
110
|
+
content_id=f"repo/{repo}",
|
|
111
|
+
content_type="repo",
|
|
112
|
+
title=repo,
|
|
113
|
+
url=f"https://github.com/{repo}",
|
|
114
|
+
published_at=end,
|
|
115
|
+
primary_metric=float(stats.get("stars_delta_7d", 0) or 0),
|
|
116
|
+
metric_name="stars_delta",
|
|
117
|
+
secondary_metrics={
|
|
118
|
+
"stars_total": float(stats.get("stars", 0) or 0),
|
|
119
|
+
"forks": float(stats.get("forks", 0) or 0),
|
|
120
|
+
"open_issues": float(stats.get("open_issues", 0) or 0),
|
|
121
|
+
"issues_closed": float(stats.get("issues_closed_7d", 0) or 0),
|
|
122
|
+
},
|
|
123
|
+
)
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _row_in_period(row: dict, start: datetime, end: datetime) -> bool:
|
|
128
|
+
"""True if row's created_at/updated_at falls in [start, end].
|
|
129
|
+
|
|
130
|
+
Falls back to True (include the row) when neither timestamp is present —
|
|
131
|
+
older Instantly campaigns may not include either field. Without this
|
|
132
|
+
filter, --since 7d and --since 90d return identical metrics.
|
|
133
|
+
"""
|
|
134
|
+
raw = row.get("created_at") or row.get("updated_at")
|
|
135
|
+
if not raw:
|
|
136
|
+
return True
|
|
137
|
+
try:
|
|
138
|
+
when = datetime.fromisoformat(str(raw).replace("Z", "+00:00"))
|
|
139
|
+
except ValueError:
|
|
140
|
+
return True
|
|
141
|
+
return start <= when <= end
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class InstantlyCollector:
|
|
145
|
+
"""One PerformanceMetric per email campaign; reply_rate is primary KPI.
|
|
146
|
+
|
|
147
|
+
Wrapped client is expected to expose
|
|
148
|
+
``async list_campaigns_with_analytics() -> list[dict]`` with at minimum
|
|
149
|
+
``id, name, sent, opens, clicks, replies, open_rate, reply_rate``,
|
|
150
|
+
and optionally ``created_at`` / ``updated_at`` for period filtering.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, client):
|
|
154
|
+
self.client = client
|
|
155
|
+
|
|
156
|
+
async def collect(self, period: Period) -> list[PerformanceMetric]:
|
|
157
|
+
start, end = period
|
|
158
|
+
try:
|
|
159
|
+
rows = await self.client.list_campaigns_with_analytics()
|
|
160
|
+
except Exception as exc: # noqa: BLE001
|
|
161
|
+
logger.warning("InstantlyCollector failed: %s", exc)
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
metrics: list[PerformanceMetric] = []
|
|
165
|
+
for row in rows:
|
|
166
|
+
cid = row.get("id") or ""
|
|
167
|
+
if not cid:
|
|
168
|
+
continue
|
|
169
|
+
if not _row_in_period(row, start, end):
|
|
170
|
+
continue
|
|
171
|
+
metrics.append(
|
|
172
|
+
PerformanceMetric(
|
|
173
|
+
content_id=f"email/{cid}",
|
|
174
|
+
content_type="email",
|
|
175
|
+
title=row.get("name", cid),
|
|
176
|
+
url=None,
|
|
177
|
+
published_at=end,
|
|
178
|
+
primary_metric=float(row.get("reply_rate", 0.0) or 0.0),
|
|
179
|
+
metric_name="reply_rate",
|
|
180
|
+
secondary_metrics={
|
|
181
|
+
"sent": float(row.get("sent", 0) or 0),
|
|
182
|
+
"opens": float(row.get("opens", 0) or 0),
|
|
183
|
+
"clicks": float(row.get("clicks", 0) or 0),
|
|
184
|
+
"replies": float(row.get("replies", 0) or 0),
|
|
185
|
+
"open_rate": float(row.get("open_rate", 0.0) or 0.0),
|
|
186
|
+
},
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
return metrics
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class SocialCollector:
|
|
193
|
+
"""Reads Echo's ``social_mentions`` table, filters to ``is_own_post=1``,
|
|
194
|
+
emits one metric per post with engagement_score as the primary KPI.
|
|
195
|
+
|
|
196
|
+
Returns an empty list (and logs) if the table is missing or the period
|
|
197
|
+
yields no rows. Does not raise.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
# Pinned schema contract: these columns MUST exist on Echo's
|
|
201
|
+
# social_mentions table. If a future Echo migration renames any of
|
|
202
|
+
# these, _verify_schema logs a clear warning and the collector returns
|
|
203
|
+
# [] rather than silently producing partial data.
|
|
204
|
+
_REQUIRED_COLUMNS: frozenset[str] = frozenset(
|
|
205
|
+
{
|
|
206
|
+
"platform",
|
|
207
|
+
"post_id",
|
|
208
|
+
"title",
|
|
209
|
+
"url",
|
|
210
|
+
"posted_at",
|
|
211
|
+
"upvotes",
|
|
212
|
+
"comments",
|
|
213
|
+
"engagement_score",
|
|
214
|
+
"is_own_post",
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def __init__(self, state_db_path: Path):
|
|
219
|
+
self.state_db_path = state_db_path
|
|
220
|
+
self._schema_verified = False
|
|
221
|
+
|
|
222
|
+
def _verify_schema(self, conn: sqlite3.Connection) -> bool:
|
|
223
|
+
"""Confirm social_mentions has all required columns.
|
|
224
|
+
|
|
225
|
+
Cached per instance via ``self._schema_verified`` so we only PRAGMA
|
|
226
|
+
once. Logs a single clear warning if columns are missing/renamed.
|
|
227
|
+
"""
|
|
228
|
+
if self._schema_verified:
|
|
229
|
+
return True
|
|
230
|
+
try:
|
|
231
|
+
cols = {row[1] for row in conn.execute("PRAGMA table_info(social_mentions)")}
|
|
232
|
+
except sqlite3.OperationalError:
|
|
233
|
+
return False # table doesn't exist yet
|
|
234
|
+
missing = self._REQUIRED_COLUMNS - cols
|
|
235
|
+
if missing:
|
|
236
|
+
logger.warning(
|
|
237
|
+
"SocialCollector: Echo's social_mentions table is missing "
|
|
238
|
+
"required columns: %s. Argus will return no social metrics "
|
|
239
|
+
"until the schema is updated.",
|
|
240
|
+
sorted(missing),
|
|
241
|
+
)
|
|
242
|
+
return False
|
|
243
|
+
self._schema_verified = True
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
def _read_rows(self, start_iso: str, end_iso: str):
|
|
247
|
+
"""Synchronous SQLite read; called via asyncio.to_thread to avoid
|
|
248
|
+
stalling the event loop. Returns None on any failure (the async
|
|
249
|
+
wrapper translates that to an empty result + logs)."""
|
|
250
|
+
try:
|
|
251
|
+
with sqlite3.connect(self.state_db_path) as conn:
|
|
252
|
+
conn.row_factory = sqlite3.Row
|
|
253
|
+
if not self._verify_schema(conn):
|
|
254
|
+
return None
|
|
255
|
+
return conn.execute(
|
|
256
|
+
"SELECT platform, post_id, title, url, posted_at, "
|
|
257
|
+
"upvotes, comments, engagement_score "
|
|
258
|
+
"FROM social_mentions "
|
|
259
|
+
"WHERE is_own_post = 1 AND posted_at >= ? AND posted_at <= ?",
|
|
260
|
+
(start_iso, end_iso),
|
|
261
|
+
).fetchall()
|
|
262
|
+
except sqlite3.OperationalError as exc:
|
|
263
|
+
logger.info("SocialCollector: %s", exc)
|
|
264
|
+
return None
|
|
265
|
+
except Exception as exc: # noqa: BLE001
|
|
266
|
+
logger.warning("SocialCollector failed: %s", exc)
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
async def collect(self, period: Period) -> list[PerformanceMetric]:
|
|
270
|
+
start, end = period
|
|
271
|
+
if not self.state_db_path.is_file():
|
|
272
|
+
logger.info("SocialCollector: state.db not present, skipping")
|
|
273
|
+
return []
|
|
274
|
+
|
|
275
|
+
rows = await asyncio.to_thread(
|
|
276
|
+
self._read_rows,
|
|
277
|
+
start.isoformat(),
|
|
278
|
+
end.isoformat(),
|
|
279
|
+
)
|
|
280
|
+
if rows is None:
|
|
281
|
+
return []
|
|
282
|
+
|
|
283
|
+
metrics: list[PerformanceMetric] = []
|
|
284
|
+
for row in rows:
|
|
285
|
+
try:
|
|
286
|
+
posted_at = datetime.fromisoformat(row["posted_at"].replace("Z", "+00:00"))
|
|
287
|
+
except ValueError:
|
|
288
|
+
posted_at = end
|
|
289
|
+
metrics.append(
|
|
290
|
+
PerformanceMetric(
|
|
291
|
+
content_id=f"social/{row['platform']}/{row['post_id']}",
|
|
292
|
+
content_type="social",
|
|
293
|
+
title=row["title"] or row["post_id"],
|
|
294
|
+
url=row["url"],
|
|
295
|
+
published_at=posted_at,
|
|
296
|
+
primary_metric=float(row["engagement_score"] or 0.0),
|
|
297
|
+
metric_name="engagement_score",
|
|
298
|
+
secondary_metrics={
|
|
299
|
+
"upvotes": float(row["upvotes"] or 0),
|
|
300
|
+
"comments": float(row["comments"] or 0),
|
|
301
|
+
},
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
return metrics
|