devrel-origin 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devrel_origin/__init__.py +15 -0
- devrel_origin/cli/__init__.py +92 -0
- devrel_origin/cli/_common.py +243 -0
- devrel_origin/cli/analytics.py +28 -0
- devrel_origin/cli/argus.py +497 -0
- devrel_origin/cli/auth.py +227 -0
- devrel_origin/cli/config.py +108 -0
- devrel_origin/cli/content.py +259 -0
- devrel_origin/cli/cost.py +108 -0
- devrel_origin/cli/cro.py +298 -0
- devrel_origin/cli/deliverables.py +65 -0
- devrel_origin/cli/docs.py +91 -0
- devrel_origin/cli/doctor.py +178 -0
- devrel_origin/cli/experiment.py +29 -0
- devrel_origin/cli/growth.py +97 -0
- devrel_origin/cli/init.py +472 -0
- devrel_origin/cli/intel.py +27 -0
- devrel_origin/cli/kb.py +96 -0
- devrel_origin/cli/listen.py +31 -0
- devrel_origin/cli/marketing.py +66 -0
- devrel_origin/cli/migrate.py +45 -0
- devrel_origin/cli/run.py +46 -0
- devrel_origin/cli/sales.py +57 -0
- devrel_origin/cli/schedule.py +62 -0
- devrel_origin/cli/synthesize.py +28 -0
- devrel_origin/cli/triage.py +29 -0
- devrel_origin/cli/video.py +35 -0
- devrel_origin/core/__init__.py +58 -0
- devrel_origin/core/agent_config.py +75 -0
- devrel_origin/core/argus.py +964 -0
- devrel_origin/core/atlas.py +1450 -0
- devrel_origin/core/base.py +372 -0
- devrel_origin/core/cyra.py +563 -0
- devrel_origin/core/dex.py +708 -0
- devrel_origin/core/echo.py +614 -0
- devrel_origin/core/growth/__init__.py +27 -0
- devrel_origin/core/growth/recommendations.py +219 -0
- devrel_origin/core/growth/target_kinds.py +51 -0
- devrel_origin/core/iris.py +513 -0
- devrel_origin/core/kai.py +1367 -0
- devrel_origin/core/llm.py +542 -0
- devrel_origin/core/llm_backends.py +274 -0
- devrel_origin/core/mox.py +514 -0
- devrel_origin/core/nova.py +349 -0
- devrel_origin/core/pax.py +1205 -0
- devrel_origin/core/rex.py +532 -0
- devrel_origin/core/sage.py +486 -0
- devrel_origin/core/sentinel.py +385 -0
- devrel_origin/core/types.py +98 -0
- devrel_origin/core/video/__init__.py +22 -0
- devrel_origin/core/video/assembler.py +131 -0
- devrel_origin/core/video/browser_recorder.py +118 -0
- devrel_origin/core/video/desktop_recorder.py +254 -0
- devrel_origin/core/video/overlay_renderer.py +143 -0
- devrel_origin/core/video/script_parser.py +147 -0
- devrel_origin/core/video/tts_engine.py +82 -0
- devrel_origin/core/vox.py +268 -0
- devrel_origin/core/watchdog.py +321 -0
- devrel_origin/project/__init__.py +1 -0
- devrel_origin/project/config.py +75 -0
- devrel_origin/project/cost_sink.py +61 -0
- devrel_origin/project/init.py +104 -0
- devrel_origin/project/paths.py +75 -0
- devrel_origin/project/state.py +241 -0
- devrel_origin/project/templates/__init__.py +4 -0
- devrel_origin/project/templates/config.toml +24 -0
- devrel_origin/project/templates/devrel.gitignore +10 -0
- devrel_origin/project/templates/slop-blocklist.md +45 -0
- devrel_origin/project/templates/style.md +24 -0
- devrel_origin/project/templates/voice.md +29 -0
- devrel_origin/quality/__init__.py +66 -0
- devrel_origin/quality/editorial.py +357 -0
- devrel_origin/quality/persona.py +84 -0
- devrel_origin/quality/readability.py +148 -0
- devrel_origin/quality/slop.py +167 -0
- devrel_origin/quality/style.py +110 -0
- devrel_origin/quality/voice.py +15 -0
- devrel_origin/tools/__init__.py +9 -0
- devrel_origin/tools/analytics.py +304 -0
- devrel_origin/tools/api_client.py +393 -0
- devrel_origin/tools/apollo_client.py +305 -0
- devrel_origin/tools/code_validator.py +428 -0
- devrel_origin/tools/github_tools.py +297 -0
- devrel_origin/tools/instantly_client.py +412 -0
- devrel_origin/tools/kb_harvester.py +340 -0
- devrel_origin/tools/mcp_server.py +578 -0
- devrel_origin/tools/notifications.py +245 -0
- devrel_origin/tools/run_report.py +193 -0
- devrel_origin/tools/scheduler.py +231 -0
- devrel_origin/tools/search_tools.py +321 -0
- devrel_origin/tools/self_improve.py +168 -0
- devrel_origin/tools/sheets.py +236 -0
- devrel_origin-0.2.14.dist-info/METADATA +354 -0
- devrel_origin-0.2.14.dist-info/RECORD +98 -0
- devrel_origin-0.2.14.dist-info/WHEEL +5 -0
- devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
- devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
- devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Voice profile
|
|
2
|
+
|
|
3
|
+
The tone, register, and stylistic markers that make content sound like *this product* — not generic AI output. Keep this short. Edit it after you read your published content out loud and hear the voice.
|
|
4
|
+
|
|
5
|
+
## Tone
|
|
6
|
+
|
|
7
|
+
Describe the voice in 3-5 adjectives.
|
|
8
|
+
|
|
9
|
+
> Replace with: e.g., "direct, technical, mildly irreverent, never preachy, no marketing fluff."
|
|
10
|
+
|
|
11
|
+
One or two sentences explaining how that voice shows up in writing.
|
|
12
|
+
|
|
13
|
+
## Sample passages
|
|
14
|
+
|
|
15
|
+
Three to five short excerpts (50-150 words each) from existing content that should sound exactly like new content. Use your best published work.
|
|
16
|
+
|
|
17
|
+
> Replace this blockquote with a real sample.
|
|
18
|
+
|
|
19
|
+
> Replace this blockquote with a real sample.
|
|
20
|
+
|
|
21
|
+
> Replace this blockquote with a real sample.
|
|
22
|
+
|
|
23
|
+
## Words and phrases we use
|
|
24
|
+
|
|
25
|
+
Comma-separated list of vocabulary that's distinctively ours.
|
|
26
|
+
|
|
27
|
+
## Words and phrases we avoid
|
|
28
|
+
|
|
29
|
+
Beyond the global slop blocklist, anything specific to this product's voice that should never appear.
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""8-stage editorial quality pipeline for content-producing agents.
|
|
2
|
+
|
|
3
|
+
Public entry point is `run_pipeline` in `editorial.py`. Agents (Kai, Mox,
|
|
4
|
+
Pax) replace their single `generate_with_revision` call with one call to
|
|
5
|
+
`generate_with_pipeline` (this module), which dispatches to the editorial
|
|
6
|
+
pipeline when a `.devrel/` project is available and falls back to the
|
|
7
|
+
legacy revision loop otherwise. Output includes the final text plus the
|
|
8
|
+
strengths and issues summary the calling agent stores on its result.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def generate_with_pipeline(
|
|
15
|
+
*,
|
|
16
|
+
llm_client,
|
|
17
|
+
system_prompt: str,
|
|
18
|
+
user_prompt: str,
|
|
19
|
+
content_type: str,
|
|
20
|
+
logger,
|
|
21
|
+
) -> tuple[str, list[str], list[str]]:
|
|
22
|
+
"""Generate content via the editorial pipeline, falling back to the
|
|
23
|
+
legacy revision loop only when there is no .devrel/ project. AbortLoud
|
|
24
|
+
from the editorial pipeline is intentionally allowed to propagate so
|
|
25
|
+
callers can treat quality-gate failures as blocked output instead of
|
|
26
|
+
silently publishing a weaker single-revision draft."""
|
|
27
|
+
# Imports are kept inside the function to avoid circular-import risk at
|
|
28
|
+
# module load (quality is imported by editorial; editorial imports
|
|
29
|
+
# project.paths which is fine, but we also want this helper to remain
|
|
30
|
+
# cheap to import from agents).
|
|
31
|
+
from devrel_origin.project.paths import (
|
|
32
|
+
ProjectNotFoundError,
|
|
33
|
+
ProjectPaths,
|
|
34
|
+
find_devrel_root,
|
|
35
|
+
)
|
|
36
|
+
from devrel_origin.quality.editorial import run_pipeline
|
|
37
|
+
|
|
38
|
+
draft = await llm_client.generate(
|
|
39
|
+
system_prompt=system_prompt,
|
|
40
|
+
user_prompt=user_prompt,
|
|
41
|
+
)
|
|
42
|
+
try:
|
|
43
|
+
paths = ProjectPaths.from_root(find_devrel_root())
|
|
44
|
+
result = await run_pipeline(
|
|
45
|
+
initial_draft=draft,
|
|
46
|
+
content_type=content_type,
|
|
47
|
+
project_paths=paths,
|
|
48
|
+
llm_client=llm_client,
|
|
49
|
+
)
|
|
50
|
+
strengths = [result.stages[-1].detail] if result.stages else []
|
|
51
|
+
issues = [i for s in result.stages for i in s.issues]
|
|
52
|
+
return result.final_text, strengths, issues
|
|
53
|
+
except ProjectNotFoundError as e:
|
|
54
|
+
logger.warning("editorial pipeline unavailable, using single-revision: %s", e)
|
|
55
|
+
content, trace = await llm_client.generate_with_revision(
|
|
56
|
+
system_prompt=system_prompt,
|
|
57
|
+
user_prompt=user_prompt,
|
|
58
|
+
min_score=7,
|
|
59
|
+
max_rounds=2,
|
|
60
|
+
)
|
|
61
|
+
strengths = trace.critiques[-1].strengths if trace.critiques else []
|
|
62
|
+
issues = trace.critiques[-1].issues if trace.critiques else []
|
|
63
|
+
return content, strengths, issues
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
__all__ = ["generate_with_pipeline"]
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""8-stage editorial pipeline orchestrator.
|
|
2
|
+
|
|
3
|
+
Stage flow:
|
|
4
|
+
1. Generate (caller's responsibility — initial_draft is the input)
|
|
5
|
+
2. Developmental edit — generate_with_revision (Sonnet, min_score=7, max_rounds=2)
|
|
6
|
+
3. Line edit — generate_with_revision (Sonnet, min_score=7, max_rounds=2)
|
|
7
|
+
4. Copy edit — generate_with_revision (Sonnet, min_score=7, max_rounds=2)
|
|
8
|
+
5. Anti-slop — regex + LLM lint; on hit, one targeted rewrite;
|
|
9
|
+
on second failure, AbortLoud
|
|
10
|
+
6. Persona — Haiku score 1-10 + weak sections
|
|
11
|
+
7. Readability — pure-Python FRE/sentence-stats/jargon check
|
|
12
|
+
→ If 6 or 7 fail: re-run stage 4 once with the failed rubric, then
|
|
13
|
+
re-run 5/6/7 once. Second failure of 6/7 logs and ships flagged.
|
|
14
|
+
8. Brand audit — Sentinel (caller's responsibility; orchestrator
|
|
15
|
+
does not invoke Sentinel because it lives in
|
|
16
|
+
core/sentinel.py and would create a quality→core
|
|
17
|
+
dependency. The agent that calls run_pipeline
|
|
18
|
+
invokes Sentinel separately.)
|
|
19
|
+
|
|
20
|
+
Returns EditorialResult with the final text, every stage's StageResult,
|
|
21
|
+
and a JSON-serializable revision_trace.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
import time
|
|
28
|
+
from dataclasses import asdict, dataclass, field
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
from devrel_origin.project.paths import ProjectPaths
|
|
32
|
+
from devrel_origin.quality.persona import test_against_persona
|
|
33
|
+
from devrel_origin.quality.readability import check_against_target, compute_readability
|
|
34
|
+
from devrel_origin.quality.slop import find_slop, force_rewrite, llm_lint, parse_blocklist
|
|
35
|
+
from devrel_origin.quality.style import get_targets, load_style
|
|
36
|
+
from devrel_origin.quality.voice import load_voice
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AbortLoud(Exception):
|
|
42
|
+
"""Raised when the slop pipeline cannot clear flagged phrases after one
|
|
43
|
+
targeted rewrite. Callers should let this propagate; the message lists
|
|
44
|
+
the offending phrases for diagnosis."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class StageResult:
|
|
49
|
+
name: str
|
|
50
|
+
text_before: str
|
|
51
|
+
text_after: str
|
|
52
|
+
duration_s: float
|
|
53
|
+
score: int | None = None
|
|
54
|
+
issues: list[str] = field(default_factory=list)
|
|
55
|
+
detail: str = ""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class EditorialResult:
|
|
60
|
+
final_text: str
|
|
61
|
+
stages: list[StageResult]
|
|
62
|
+
flagged: bool
|
|
63
|
+
revision_trace: dict[str, Any]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_DEV_EDIT_SYSTEM = """You are a developmental editor. Improve the draft for:
|
|
67
|
+
- structure (does the opening hook? does it close cleanly?)
|
|
68
|
+
- argument (is each section earning its place?)
|
|
69
|
+
- specificity (is anything generic or hand-wavy?)
|
|
70
|
+
|
|
71
|
+
Preserve the project voice strictly. Return only the revised content.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
_LINE_EDIT_SYSTEM = """You are a line editor. Improve the draft for:
|
|
75
|
+
- sentence rhythm (vary length; avoid monotone)
|
|
76
|
+
- voice fidelity (match the voice contract precisely)
|
|
77
|
+
- word choice (specific, concrete, never vague)
|
|
78
|
+
|
|
79
|
+
Preserve structure and meaning. Return only the revised content.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
_COPY_EDIT_SYSTEM = """You are a copy editor. Improve the draft for:
|
|
83
|
+
- grammar, punctuation, agreement
|
|
84
|
+
- code blocks (correct syntax, language tags, working examples)
|
|
85
|
+
- consistency (capitalization, terminology, tense)
|
|
86
|
+
|
|
87
|
+
Make minimal changes; preserve voice. Return only the revised content.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _make_user(text: str, voice: str, style: str, content_type: str, extra: str = "") -> str:
|
|
92
|
+
parts = [
|
|
93
|
+
f"Content type: {content_type}",
|
|
94
|
+
"",
|
|
95
|
+
"Voice contract:",
|
|
96
|
+
voice or "(none yet)",
|
|
97
|
+
"",
|
|
98
|
+
"House style:",
|
|
99
|
+
style or "(none yet)",
|
|
100
|
+
"",
|
|
101
|
+
]
|
|
102
|
+
if extra:
|
|
103
|
+
parts.extend(["Additional notes:", extra, ""])
|
|
104
|
+
parts.extend(["Draft:", text])
|
|
105
|
+
return "\n".join(parts)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def _editorial_stage(
|
|
109
|
+
*,
|
|
110
|
+
name: str,
|
|
111
|
+
system: str,
|
|
112
|
+
text_before: str,
|
|
113
|
+
voice: str,
|
|
114
|
+
style: str,
|
|
115
|
+
content_type: str,
|
|
116
|
+
llm_client,
|
|
117
|
+
extra: str = "",
|
|
118
|
+
) -> tuple[str, StageResult]:
|
|
119
|
+
t0 = time.monotonic()
|
|
120
|
+
user = _make_user(text_before, voice, style, content_type, extra)
|
|
121
|
+
revised, trace = await llm_client.generate_with_revision(
|
|
122
|
+
system_prompt=system,
|
|
123
|
+
user_prompt=user,
|
|
124
|
+
min_score=7,
|
|
125
|
+
max_rounds=2,
|
|
126
|
+
)
|
|
127
|
+
final_score = getattr(trace, "final_score", None)
|
|
128
|
+
rounds = getattr(trace, "revision_rounds", 0)
|
|
129
|
+
return revised, StageResult(
|
|
130
|
+
name=name,
|
|
131
|
+
text_before=text_before,
|
|
132
|
+
text_after=revised,
|
|
133
|
+
duration_s=round(time.monotonic() - t0, 3),
|
|
134
|
+
score=final_score,
|
|
135
|
+
detail=f"rounds={rounds}",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
async def _slop_stage(
|
|
140
|
+
*,
|
|
141
|
+
text_before: str,
|
|
142
|
+
blocklist: list[str],
|
|
143
|
+
voice: str,
|
|
144
|
+
llm_client,
|
|
145
|
+
) -> tuple[str, StageResult]:
|
|
146
|
+
t0 = time.monotonic()
|
|
147
|
+
regex_hits = find_slop(text_before, blocklist)
|
|
148
|
+
lint_hits = await llm_lint(text_before, voice, llm_client)
|
|
149
|
+
if not regex_hits and not lint_hits:
|
|
150
|
+
return text_before, StageResult(
|
|
151
|
+
name="anti_slop",
|
|
152
|
+
text_before=text_before,
|
|
153
|
+
text_after=text_before,
|
|
154
|
+
duration_s=round(time.monotonic() - t0, 3),
|
|
155
|
+
detail="clean",
|
|
156
|
+
)
|
|
157
|
+
rewritten = await force_rewrite(text_before, regex_hits, lint_hits, voice, llm_client)
|
|
158
|
+
# Re-check after rewrite.
|
|
159
|
+
re_regex = find_slop(rewritten, blocklist)
|
|
160
|
+
re_lint = await llm_lint(rewritten, voice, llm_client)
|
|
161
|
+
if re_regex or re_lint:
|
|
162
|
+
offenders = sorted({h.phrase for h in re_regex} | set(re_lint))
|
|
163
|
+
raise AbortLoud("Slop persisted after rewrite: " + ", ".join(offenders))
|
|
164
|
+
return rewritten, StageResult(
|
|
165
|
+
name="anti_slop",
|
|
166
|
+
text_before=text_before,
|
|
167
|
+
text_after=rewritten,
|
|
168
|
+
duration_s=round(time.monotonic() - t0, 3),
|
|
169
|
+
issues=sorted({h.phrase for h in regex_hits} | set(lint_hits)),
|
|
170
|
+
detail="rewrite_applied",
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
async def _persona_stage(
|
|
175
|
+
*,
|
|
176
|
+
text: str,
|
|
177
|
+
content_type: str,
|
|
178
|
+
voice: str,
|
|
179
|
+
llm_client,
|
|
180
|
+
) -> StageResult:
|
|
181
|
+
t0 = time.monotonic()
|
|
182
|
+
res = await test_against_persona(
|
|
183
|
+
text=text, content_type=content_type, voice=voice, llm_client=llm_client
|
|
184
|
+
)
|
|
185
|
+
issues = []
|
|
186
|
+
if res.score < 7:
|
|
187
|
+
issues.append(f"Persona score {res.score} < 7")
|
|
188
|
+
if res.weak_sections:
|
|
189
|
+
issues.extend(res.weak_sections)
|
|
190
|
+
return StageResult(
|
|
191
|
+
name="persona",
|
|
192
|
+
text_before=text,
|
|
193
|
+
text_after=text,
|
|
194
|
+
duration_s=round(time.monotonic() - t0, 3),
|
|
195
|
+
score=res.score,
|
|
196
|
+
issues=issues,
|
|
197
|
+
detail=res.feedback,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _readability_stage(*, text: str, content_type: str, style_md: str) -> StageResult:
|
|
202
|
+
t0 = time.monotonic()
|
|
203
|
+
targets = get_targets(content_type, style_md)
|
|
204
|
+
scores = compute_readability(text)
|
|
205
|
+
issues = check_against_target(scores, targets)
|
|
206
|
+
return StageResult(
|
|
207
|
+
name="readability",
|
|
208
|
+
text_before=text,
|
|
209
|
+
text_after=text,
|
|
210
|
+
duration_s=round(time.monotonic() - t0, 3),
|
|
211
|
+
issues=issues,
|
|
212
|
+
detail=f"FRE={scores.flesch_reading_ease}, MSL={scores.mean_sentence_length}",
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def run_pipeline(
|
|
217
|
+
*,
|
|
218
|
+
initial_draft: str,
|
|
219
|
+
content_type: str,
|
|
220
|
+
project_paths: ProjectPaths,
|
|
221
|
+
llm_client,
|
|
222
|
+
) -> EditorialResult:
|
|
223
|
+
"""Run the 8-stage editorial pipeline. See module docstring."""
|
|
224
|
+
voice = load_voice(project_paths)
|
|
225
|
+
style_md = load_style(project_paths)
|
|
226
|
+
blocklist = parse_blocklist(
|
|
227
|
+
project_paths.slop_file.read_text(encoding="utf-8")
|
|
228
|
+
if project_paths.slop_file.is_file()
|
|
229
|
+
else ""
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Fail-fast on unknown content_type before any LLM spend.
|
|
233
|
+
get_targets(content_type, style_md)
|
|
234
|
+
|
|
235
|
+
stages: list[StageResult] = []
|
|
236
|
+
|
|
237
|
+
# Stages 2-4: editorial loops.
|
|
238
|
+
text, sr = await _editorial_stage(
|
|
239
|
+
name="developmental_edit",
|
|
240
|
+
system=_DEV_EDIT_SYSTEM,
|
|
241
|
+
text_before=initial_draft,
|
|
242
|
+
voice=voice,
|
|
243
|
+
style=style_md,
|
|
244
|
+
content_type=content_type,
|
|
245
|
+
llm_client=llm_client,
|
|
246
|
+
)
|
|
247
|
+
stages.append(sr)
|
|
248
|
+
|
|
249
|
+
text, sr = await _editorial_stage(
|
|
250
|
+
name="line_edit",
|
|
251
|
+
system=_LINE_EDIT_SYSTEM,
|
|
252
|
+
text_before=text,
|
|
253
|
+
voice=voice,
|
|
254
|
+
style=style_md,
|
|
255
|
+
content_type=content_type,
|
|
256
|
+
llm_client=llm_client,
|
|
257
|
+
)
|
|
258
|
+
stages.append(sr)
|
|
259
|
+
|
|
260
|
+
text, sr = await _editorial_stage(
|
|
261
|
+
name="copy_edit",
|
|
262
|
+
system=_COPY_EDIT_SYSTEM,
|
|
263
|
+
text_before=text,
|
|
264
|
+
voice=voice,
|
|
265
|
+
style=style_md,
|
|
266
|
+
content_type=content_type,
|
|
267
|
+
llm_client=llm_client,
|
|
268
|
+
)
|
|
269
|
+
stages.append(sr)
|
|
270
|
+
|
|
271
|
+
# Stage 5: anti-slop. May raise AbortLoud — let it propagate.
|
|
272
|
+
text, sr = await _slop_stage(
|
|
273
|
+
text_before=text,
|
|
274
|
+
blocklist=blocklist,
|
|
275
|
+
voice=voice,
|
|
276
|
+
llm_client=llm_client,
|
|
277
|
+
)
|
|
278
|
+
stages.append(sr)
|
|
279
|
+
|
|
280
|
+
# Stage 6: persona.
|
|
281
|
+
persona_sr = await _persona_stage(
|
|
282
|
+
text=text,
|
|
283
|
+
content_type=content_type,
|
|
284
|
+
voice=voice,
|
|
285
|
+
llm_client=llm_client,
|
|
286
|
+
)
|
|
287
|
+
stages.append(persona_sr)
|
|
288
|
+
|
|
289
|
+
# Stage 7: readability.
|
|
290
|
+
readability_sr = _readability_stage(text=text, content_type=content_type, style_md=style_md)
|
|
291
|
+
stages.append(readability_sr)
|
|
292
|
+
|
|
293
|
+
# Re-loop into copy-edit if either soft gate failed.
|
|
294
|
+
flagged = False
|
|
295
|
+
if persona_sr.issues or readability_sr.issues:
|
|
296
|
+
extra = "Previous persona feedback: " + (persona_sr.detail or "")
|
|
297
|
+
if readability_sr.issues:
|
|
298
|
+
extra += "\nReadability issues: " + "; ".join(readability_sr.issues)
|
|
299
|
+
text, sr = await _editorial_stage(
|
|
300
|
+
name="copy_edit",
|
|
301
|
+
system=_COPY_EDIT_SYSTEM,
|
|
302
|
+
text_before=text,
|
|
303
|
+
voice=voice,
|
|
304
|
+
style=style_md,
|
|
305
|
+
content_type=content_type,
|
|
306
|
+
llm_client=llm_client,
|
|
307
|
+
extra=extra,
|
|
308
|
+
)
|
|
309
|
+
stages.append(sr)
|
|
310
|
+
|
|
311
|
+
# Re-run anti-slop, persona, readability one more time.
|
|
312
|
+
text, sr = await _slop_stage(
|
|
313
|
+
text_before=text,
|
|
314
|
+
blocklist=blocklist,
|
|
315
|
+
voice=voice,
|
|
316
|
+
llm_client=llm_client,
|
|
317
|
+
)
|
|
318
|
+
stages.append(sr)
|
|
319
|
+
|
|
320
|
+
persona2 = await _persona_stage(
|
|
321
|
+
text=text,
|
|
322
|
+
content_type=content_type,
|
|
323
|
+
voice=voice,
|
|
324
|
+
llm_client=llm_client,
|
|
325
|
+
)
|
|
326
|
+
stages.append(persona2)
|
|
327
|
+
|
|
328
|
+
readability2 = _readability_stage(text=text, content_type=content_type, style_md=style_md)
|
|
329
|
+
stages.append(readability2)
|
|
330
|
+
|
|
331
|
+
# Readability re-runs are informational only — short test/mock text
|
|
332
|
+
# often fails MSL but the persona pass is what gates "ship vs flag".
|
|
333
|
+
# Only persona2 failure flips the flagged bit.
|
|
334
|
+
if persona2.issues:
|
|
335
|
+
logger.warning(
|
|
336
|
+
"editorial pipeline shipping with flagged=True for content_type=%s "
|
|
337
|
+
"(persona score %s)",
|
|
338
|
+
content_type,
|
|
339
|
+
persona2.score,
|
|
340
|
+
)
|
|
341
|
+
flagged = True
|
|
342
|
+
|
|
343
|
+
revision_trace = {
|
|
344
|
+
"content_type": content_type,
|
|
345
|
+
"voice_present": bool(voice),
|
|
346
|
+
"style_present": bool(style_md),
|
|
347
|
+
"blocklist_size": len(blocklist),
|
|
348
|
+
"stages": [asdict(s) for s in stages],
|
|
349
|
+
"flagged": flagged,
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return EditorialResult(
|
|
353
|
+
final_text=text,
|
|
354
|
+
stages=stages,
|
|
355
|
+
flagged=flagged,
|
|
356
|
+
revision_trace=revision_trace,
|
|
357
|
+
)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Stage 6 of the editorial pipeline.
|
|
2
|
+
|
|
3
|
+
Single Haiku call against a fixed persona — "skeptical senior backend
|
|
4
|
+
developer" — that scores the draft 1-10 on resonance and flags weak
|
|
5
|
+
sections with quoted excerpts. The orchestrator uses the score as a soft
|
|
6
|
+
gate: if it falls below 7, control returns to copy-edit (stage 4) once
|
|
7
|
+
with the persona feedback attached as a critique.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class PersonaResult:
|
|
18
|
+
score: int
|
|
19
|
+
weak_sections: list[str]
|
|
20
|
+
feedback: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
_SYSTEM_PROMPT = """You are a skeptical senior backend developer with 10+ years of experience. You're allergic to marketing fluff, consultant-speak, and AI-style hedging. You've read enough developer-targeted content to instantly spot when a piece is generic, surface-level, or written for a project the author hasn't actually used.
|
|
24
|
+
|
|
25
|
+
Score the content on a 1-10 scale:
|
|
26
|
+
- 10 = This made me want to try the product immediately. Specific, concrete, technical.
|
|
27
|
+
- 7-9 = Solid. I'd send it to a teammate.
|
|
28
|
+
- 4-6 = Generic. Could be about any product. Skim-level.
|
|
29
|
+
- 1-3 = Pure marketing. I'd close the tab.
|
|
30
|
+
|
|
31
|
+
Identify up to 3 weak sections — quote them verbatim or paraphrase tightly. Be honest; don't pad.
|
|
32
|
+
|
|
33
|
+
Return strict JSON:
|
|
34
|
+
{
|
|
35
|
+
"score": 1-10,
|
|
36
|
+
"weak_sections": ["…", "…"],
|
|
37
|
+
"feedback": "1-2 sentences on what's working and what isn't"
|
|
38
|
+
}
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _coerce_result(raw: str) -> PersonaResult:
|
|
43
|
+
try:
|
|
44
|
+
data = json.loads(raw.strip())
|
|
45
|
+
except json.JSONDecodeError:
|
|
46
|
+
return PersonaResult(
|
|
47
|
+
score=5,
|
|
48
|
+
weak_sections=[],
|
|
49
|
+
feedback="Could not parse persona response as JSON.",
|
|
50
|
+
)
|
|
51
|
+
score = int(data.get("score", 5))
|
|
52
|
+
score = max(1, min(10, score))
|
|
53
|
+
weak = list(data.get("weak_sections", []) or [])
|
|
54
|
+
feedback = str(data.get("feedback", ""))
|
|
55
|
+
return PersonaResult(score=score, weak_sections=weak, feedback=feedback)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def test_against_persona(
|
|
59
|
+
*,
|
|
60
|
+
text: str,
|
|
61
|
+
content_type: str,
|
|
62
|
+
voice: str,
|
|
63
|
+
llm_client,
|
|
64
|
+
) -> PersonaResult:
|
|
65
|
+
"""Single Haiku call. Returns a structured score + weak-sections + feedback."""
|
|
66
|
+
user = (
|
|
67
|
+
f"Content type: {content_type}\n\n"
|
|
68
|
+
"Voice contract for this product:\n\n"
|
|
69
|
+
+ (voice or "(no voice profile yet)")
|
|
70
|
+
+ "\n\nContent to evaluate:\n\n"
|
|
71
|
+
+ text
|
|
72
|
+
)
|
|
73
|
+
raw = await llm_client.generate(
|
|
74
|
+
system_prompt=_SYSTEM_PROMPT,
|
|
75
|
+
user_prompt=user,
|
|
76
|
+
model="haiku",
|
|
77
|
+
)
|
|
78
|
+
return _coerce_result(raw)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Tell pytest not to collect the function as a test when it's imported
|
|
82
|
+
# into a test_ module. The `test_` prefix here means "evaluate against a
|
|
83
|
+
# persona," not a unit test.
|
|
84
|
+
test_against_persona.__test__ = False # type: ignore[attr-defined]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Pure-Python readability scoring: Flesch Reading Ease, sentence-length
|
|
2
|
+
statistics, jargon density. No LLM calls. Used as stage 7 of the
|
|
3
|
+
editorial pipeline.
|
|
4
|
+
|
|
5
|
+
Flesch Reading Ease formula:
|
|
6
|
+
FRE = 206.835 - 1.015 * (words/sentences) - 84.6 * (syllables/words)
|
|
7
|
+
|
|
8
|
+
Higher FRE = easier to read. Style.md targets per content-type are
|
|
9
|
+
expressed in this scale (e.g., tutorial 50-65 = "fairly difficult").
|
|
10
|
+
|
|
11
|
+
Jargon density: fraction of content words >= 12 characters. Imperfect but
|
|
12
|
+
catches the obvious "academic-ese" drift; tuned alongside style.md targets.
|
|
13
|
+
|
|
14
|
+
Drift tolerance: a score that's within 10 points of the target range
|
|
15
|
+
(below min or above max) does not flag. The pipeline only reverts to copy
|
|
16
|
+
edit (stage 4) if drift exceeds 10 points OR sentence length exits the
|
|
17
|
+
target range entirely.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
import statistics
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
|
|
26
|
+
from devrel_origin.quality.style import ContentTypeTargets
|
|
27
|
+
|
|
28
|
+
DRIFT_TOLERANCE = 10 # Flesch points
|
|
29
|
+
|
|
30
|
+
_VOWELS = set("aeiouy")
|
|
31
|
+
_WORD_RE = re.compile(r"\b[a-zA-Z']+\b")
|
|
32
|
+
_SENTENCE_END = re.compile(r"[.!?]+(?=\s|$)")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def count_syllables(word: str) -> int:
|
|
36
|
+
"""Approximate syllable count.
|
|
37
|
+
|
|
38
|
+
Algorithm:
|
|
39
|
+
1. Count vowel groups (consecutive vowels = one group). y counts as
|
|
40
|
+
a vowel.
|
|
41
|
+
2. Silent terminal-e rule: if word ends in `e` AND syllables > 1,
|
|
42
|
+
drop one.
|
|
43
|
+
3. `[consonant]le` exception: words ending in consonant + "le"
|
|
44
|
+
(simple, syllable, table) keep the final "le" syllable. The
|
|
45
|
+
silent-e rule does not fire for these.
|
|
46
|
+
"""
|
|
47
|
+
if not word:
|
|
48
|
+
return 0
|
|
49
|
+
word = word.lower()
|
|
50
|
+
syllables = 0
|
|
51
|
+
prev_vowel = False
|
|
52
|
+
for ch in word:
|
|
53
|
+
is_v = ch in _VOWELS
|
|
54
|
+
if is_v and not prev_vowel:
|
|
55
|
+
syllables += 1
|
|
56
|
+
prev_vowel = is_v
|
|
57
|
+
# Terminal silent-e, but preserve `[consonant]le` endings.
|
|
58
|
+
if word.endswith("e") and syllables > 1:
|
|
59
|
+
ends_in_consonant_le = len(word) >= 3 and word[-2:] == "le" and word[-3] not in _VOWELS
|
|
60
|
+
if not ends_in_consonant_le:
|
|
61
|
+
syllables -= 1
|
|
62
|
+
return max(1, syllables)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class ReadabilityScores:
|
|
67
|
+
flesch_reading_ease: float
|
|
68
|
+
mean_sentence_length: float
|
|
69
|
+
sentence_length_variance: float
|
|
70
|
+
jargon_density: float
|
|
71
|
+
word_count: int
|
|
72
|
+
sentence_count: int
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _split_sentences(text: str) -> list[str]:
|
|
76
|
+
"""Split on terminal punctuation. Filters empty fragments."""
|
|
77
|
+
pieces = _SENTENCE_END.split(text)
|
|
78
|
+
out = [p.strip() for p in pieces if p.strip()]
|
|
79
|
+
return out
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _words_in(text: str) -> list[str]:
|
|
83
|
+
return _WORD_RE.findall(text)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def compute_readability(text: str) -> ReadabilityScores:
|
|
87
|
+
"""Compute all readability metrics for `text`."""
|
|
88
|
+
sentences = _split_sentences(text)
|
|
89
|
+
sentence_count = len(sentences)
|
|
90
|
+
words = _words_in(text)
|
|
91
|
+
word_count = len(words)
|
|
92
|
+
|
|
93
|
+
if word_count == 0 or sentence_count == 0:
|
|
94
|
+
return ReadabilityScores(0.0, 0.0, 0.0, 0.0, word_count, sentence_count)
|
|
95
|
+
|
|
96
|
+
syllable_total = sum(count_syllables(w) for w in words)
|
|
97
|
+
flesch = 206.835 - 1.015 * (word_count / sentence_count) - 84.6 * (syllable_total / word_count)
|
|
98
|
+
|
|
99
|
+
sentence_lengths = [len(_words_in(s)) for s in sentences]
|
|
100
|
+
mean_sl = statistics.mean(sentence_lengths)
|
|
101
|
+
var_sl = statistics.pvariance(sentence_lengths) if sentence_count > 1 else 0.0
|
|
102
|
+
|
|
103
|
+
long_words = sum(1 for w in words if len(w) >= 12)
|
|
104
|
+
jargon = long_words / word_count
|
|
105
|
+
|
|
106
|
+
return ReadabilityScores(
|
|
107
|
+
flesch_reading_ease=round(flesch, 2),
|
|
108
|
+
mean_sentence_length=round(mean_sl, 2),
|
|
109
|
+
sentence_length_variance=round(var_sl, 2),
|
|
110
|
+
jargon_density=round(jargon, 4),
|
|
111
|
+
word_count=word_count,
|
|
112
|
+
sentence_count=sentence_count,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def check_against_target(
|
|
117
|
+
scores: ReadabilityScores,
|
|
118
|
+
target: ContentTypeTargets,
|
|
119
|
+
) -> list[str]:
|
|
120
|
+
"""Return a list of human-readable issues, or empty if scores meet the
|
|
121
|
+
target within DRIFT_TOLERANCE for Flesch and exactly for sentence length."""
|
|
122
|
+
issues: list[str] = []
|
|
123
|
+
|
|
124
|
+
if scores.flesch_reading_ease < target.flesch_min - DRIFT_TOLERANCE:
|
|
125
|
+
issues.append(
|
|
126
|
+
f"Flesch reading ease {scores.flesch_reading_ease} is "
|
|
127
|
+
f"{target.flesch_min - scores.flesch_reading_ease:.1f} points below "
|
|
128
|
+
f"min {target.flesch_min} (drift > {DRIFT_TOLERANCE})."
|
|
129
|
+
)
|
|
130
|
+
elif scores.flesch_reading_ease > target.flesch_max + DRIFT_TOLERANCE:
|
|
131
|
+
issues.append(
|
|
132
|
+
f"Flesch reading ease {scores.flesch_reading_ease} is "
|
|
133
|
+
f"{scores.flesch_reading_ease - target.flesch_max:.1f} points above "
|
|
134
|
+
f"max {target.flesch_max} (drift > {DRIFT_TOLERANCE})."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if scores.mean_sentence_length < target.sentence_len_min:
|
|
138
|
+
issues.append(
|
|
139
|
+
f"Mean sentence length {scores.mean_sentence_length} below "
|
|
140
|
+
f"min {target.sentence_len_min}."
|
|
141
|
+
)
|
|
142
|
+
elif scores.mean_sentence_length > target.sentence_len_max:
|
|
143
|
+
issues.append(
|
|
144
|
+
f"Mean sentence length {scores.mean_sentence_length} above "
|
|
145
|
+
f"max {target.sentence_len_max}."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return issues
|