devrel-origin 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. devrel_origin/__init__.py +15 -0
  2. devrel_origin/cli/__init__.py +92 -0
  3. devrel_origin/cli/_common.py +243 -0
  4. devrel_origin/cli/analytics.py +28 -0
  5. devrel_origin/cli/argus.py +497 -0
  6. devrel_origin/cli/auth.py +227 -0
  7. devrel_origin/cli/config.py +108 -0
  8. devrel_origin/cli/content.py +259 -0
  9. devrel_origin/cli/cost.py +108 -0
  10. devrel_origin/cli/cro.py +298 -0
  11. devrel_origin/cli/deliverables.py +65 -0
  12. devrel_origin/cli/docs.py +91 -0
  13. devrel_origin/cli/doctor.py +178 -0
  14. devrel_origin/cli/experiment.py +29 -0
  15. devrel_origin/cli/growth.py +97 -0
  16. devrel_origin/cli/init.py +472 -0
  17. devrel_origin/cli/intel.py +27 -0
  18. devrel_origin/cli/kb.py +96 -0
  19. devrel_origin/cli/listen.py +31 -0
  20. devrel_origin/cli/marketing.py +66 -0
  21. devrel_origin/cli/migrate.py +45 -0
  22. devrel_origin/cli/run.py +46 -0
  23. devrel_origin/cli/sales.py +57 -0
  24. devrel_origin/cli/schedule.py +62 -0
  25. devrel_origin/cli/synthesize.py +28 -0
  26. devrel_origin/cli/triage.py +29 -0
  27. devrel_origin/cli/video.py +35 -0
  28. devrel_origin/core/__init__.py +58 -0
  29. devrel_origin/core/agent_config.py +75 -0
  30. devrel_origin/core/argus.py +964 -0
  31. devrel_origin/core/atlas.py +1450 -0
  32. devrel_origin/core/base.py +372 -0
  33. devrel_origin/core/cyra.py +563 -0
  34. devrel_origin/core/dex.py +708 -0
  35. devrel_origin/core/echo.py +614 -0
  36. devrel_origin/core/growth/__init__.py +27 -0
  37. devrel_origin/core/growth/recommendations.py +219 -0
  38. devrel_origin/core/growth/target_kinds.py +51 -0
  39. devrel_origin/core/iris.py +513 -0
  40. devrel_origin/core/kai.py +1367 -0
  41. devrel_origin/core/llm.py +542 -0
  42. devrel_origin/core/llm_backends.py +274 -0
  43. devrel_origin/core/mox.py +514 -0
  44. devrel_origin/core/nova.py +349 -0
  45. devrel_origin/core/pax.py +1205 -0
  46. devrel_origin/core/rex.py +532 -0
  47. devrel_origin/core/sage.py +486 -0
  48. devrel_origin/core/sentinel.py +385 -0
  49. devrel_origin/core/types.py +98 -0
  50. devrel_origin/core/video/__init__.py +22 -0
  51. devrel_origin/core/video/assembler.py +131 -0
  52. devrel_origin/core/video/browser_recorder.py +118 -0
  53. devrel_origin/core/video/desktop_recorder.py +254 -0
  54. devrel_origin/core/video/overlay_renderer.py +143 -0
  55. devrel_origin/core/video/script_parser.py +147 -0
  56. devrel_origin/core/video/tts_engine.py +82 -0
  57. devrel_origin/core/vox.py +268 -0
  58. devrel_origin/core/watchdog.py +321 -0
  59. devrel_origin/project/__init__.py +1 -0
  60. devrel_origin/project/config.py +75 -0
  61. devrel_origin/project/cost_sink.py +61 -0
  62. devrel_origin/project/init.py +104 -0
  63. devrel_origin/project/paths.py +75 -0
  64. devrel_origin/project/state.py +241 -0
  65. devrel_origin/project/templates/__init__.py +4 -0
  66. devrel_origin/project/templates/config.toml +24 -0
  67. devrel_origin/project/templates/devrel.gitignore +10 -0
  68. devrel_origin/project/templates/slop-blocklist.md +45 -0
  69. devrel_origin/project/templates/style.md +24 -0
  70. devrel_origin/project/templates/voice.md +29 -0
  71. devrel_origin/quality/__init__.py +66 -0
  72. devrel_origin/quality/editorial.py +357 -0
  73. devrel_origin/quality/persona.py +84 -0
  74. devrel_origin/quality/readability.py +148 -0
  75. devrel_origin/quality/slop.py +167 -0
  76. devrel_origin/quality/style.py +110 -0
  77. devrel_origin/quality/voice.py +15 -0
  78. devrel_origin/tools/__init__.py +9 -0
  79. devrel_origin/tools/analytics.py +304 -0
  80. devrel_origin/tools/api_client.py +393 -0
  81. devrel_origin/tools/apollo_client.py +305 -0
  82. devrel_origin/tools/code_validator.py +428 -0
  83. devrel_origin/tools/github_tools.py +297 -0
  84. devrel_origin/tools/instantly_client.py +412 -0
  85. devrel_origin/tools/kb_harvester.py +340 -0
  86. devrel_origin/tools/mcp_server.py +578 -0
  87. devrel_origin/tools/notifications.py +245 -0
  88. devrel_origin/tools/run_report.py +193 -0
  89. devrel_origin/tools/scheduler.py +231 -0
  90. devrel_origin/tools/search_tools.py +321 -0
  91. devrel_origin/tools/self_improve.py +168 -0
  92. devrel_origin/tools/sheets.py +236 -0
  93. devrel_origin-0.2.14.dist-info/METADATA +354 -0
  94. devrel_origin-0.2.14.dist-info/RECORD +98 -0
  95. devrel_origin-0.2.14.dist-info/WHEEL +5 -0
  96. devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
  97. devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
  98. devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,29 @@
1
+ # Voice profile
2
+
3
+ The tone, register, and stylistic markers that make content sound like *this product* — not generic AI output. Keep this short. Edit it after you read your published content out loud and hear the voice.
4
+
5
+ ## Tone
6
+
7
+ Describe the voice in 3-5 adjectives.
8
+
9
+ > Replace with: e.g., "direct, technical, mildly irreverent, never preachy, no marketing fluff."
10
+
11
+ One or two sentences explaining how that voice shows up in writing.
12
+
13
+ ## Sample passages
14
+
15
+ Three to five short excerpts (50-150 words each) from existing content that should sound exactly like new content. Use your best published work.
16
+
17
+ > Replace this blockquote with a real sample.
18
+
19
+ > Replace this blockquote with a real sample.
20
+
21
+ > Replace this blockquote with a real sample.
22
+
23
+ ## Words and phrases we use
24
+
25
+ Comma-separated list of vocabulary that's distinctively ours.
26
+
27
+ ## Words and phrases we avoid
28
+
29
+ Beyond the global slop blocklist, anything specific to this product's voice that should never appear.
@@ -0,0 +1,66 @@
1
+ """8-stage editorial quality pipeline for content-producing agents.
2
+
3
+ Public entry point is `run_pipeline` in `editorial.py`. Agents (Kai, Mox,
4
+ Pax) replace their single `generate_with_revision` call with one call to
5
+ `generate_with_pipeline` (this module), which dispatches to the editorial
6
+ pipeline when a `.devrel/` project is available and falls back to the
7
+ legacy revision loop otherwise. Output includes the final text plus the
8
+ strengths and issues summary the calling agent stores on its result.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+
14
+ async def generate_with_pipeline(
15
+ *,
16
+ llm_client,
17
+ system_prompt: str,
18
+ user_prompt: str,
19
+ content_type: str,
20
+ logger,
21
+ ) -> tuple[str, list[str], list[str]]:
22
+ """Generate content via the editorial pipeline, falling back to the
23
+ legacy revision loop only when there is no .devrel/ project. AbortLoud
24
+ from the editorial pipeline is intentionally allowed to propagate so
25
+ callers can treat quality-gate failures as blocked output instead of
26
+ silently publishing a weaker single-revision draft."""
27
+ # Imports are kept inside the function to avoid circular-import risk at
28
+ # module load (quality is imported by editorial; editorial imports
29
+ # project.paths which is fine, but we also want this helper to remain
30
+ # cheap to import from agents).
31
+ from devrel_origin.project.paths import (
32
+ ProjectNotFoundError,
33
+ ProjectPaths,
34
+ find_devrel_root,
35
+ )
36
+ from devrel_origin.quality.editorial import run_pipeline
37
+
38
+ draft = await llm_client.generate(
39
+ system_prompt=system_prompt,
40
+ user_prompt=user_prompt,
41
+ )
42
+ try:
43
+ paths = ProjectPaths.from_root(find_devrel_root())
44
+ result = await run_pipeline(
45
+ initial_draft=draft,
46
+ content_type=content_type,
47
+ project_paths=paths,
48
+ llm_client=llm_client,
49
+ )
50
+ strengths = [result.stages[-1].detail] if result.stages else []
51
+ issues = [i for s in result.stages for i in s.issues]
52
+ return result.final_text, strengths, issues
53
+ except ProjectNotFoundError as e:
54
+ logger.warning("editorial pipeline unavailable, using single-revision: %s", e)
55
+ content, trace = await llm_client.generate_with_revision(
56
+ system_prompt=system_prompt,
57
+ user_prompt=user_prompt,
58
+ min_score=7,
59
+ max_rounds=2,
60
+ )
61
+ strengths = trace.critiques[-1].strengths if trace.critiques else []
62
+ issues = trace.critiques[-1].issues if trace.critiques else []
63
+ return content, strengths, issues
64
+
65
+
66
+ __all__ = ["generate_with_pipeline"]
@@ -0,0 +1,357 @@
1
+ """8-stage editorial pipeline orchestrator.
2
+
3
+ Stage flow:
4
+ 1. Generate (caller's responsibility — initial_draft is the input)
5
+ 2. Developmental edit — generate_with_revision (Sonnet, min_score=7, max_rounds=2)
6
+ 3. Line edit — generate_with_revision (Sonnet, min_score=7, max_rounds=2)
7
+ 4. Copy edit — generate_with_revision (Sonnet, min_score=7, max_rounds=2)
8
+ 5. Anti-slop — regex + LLM lint; on hit, one targeted rewrite;
9
+ on second failure, AbortLoud
10
+ 6. Persona — Haiku score 1-10 + weak sections
11
+ 7. Readability — pure-Python FRE/sentence-stats/jargon check
12
+ → If 6 or 7 fail: re-run stage 4 once with the failed rubric, then
13
+ re-run 5/6/7 once. Second failure of 6/7 logs and ships flagged.
14
+ 8. Brand audit — Sentinel (caller's responsibility; orchestrator
15
+ does not invoke Sentinel because it lives in
16
+ core/sentinel.py and would create a quality→core
17
+ dependency. The agent that calls run_pipeline
18
+ invokes Sentinel separately.)
19
+
20
+ Returns EditorialResult with the final text, every stage's StageResult,
21
+ and a JSON-serializable revision_trace.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ import time
28
+ from dataclasses import asdict, dataclass, field
29
+ from typing import Any
30
+
31
+ from devrel_origin.project.paths import ProjectPaths
32
+ from devrel_origin.quality.persona import test_against_persona
33
+ from devrel_origin.quality.readability import check_against_target, compute_readability
34
+ from devrel_origin.quality.slop import find_slop, force_rewrite, llm_lint, parse_blocklist
35
+ from devrel_origin.quality.style import get_targets, load_style
36
+ from devrel_origin.quality.voice import load_voice
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class AbortLoud(Exception):
42
+ """Raised when the slop pipeline cannot clear flagged phrases after one
43
+ targeted rewrite. Callers should let this propagate; the message lists
44
+ the offending phrases for diagnosis."""
45
+
46
+
47
+ @dataclass
48
+ class StageResult:
49
+ name: str
50
+ text_before: str
51
+ text_after: str
52
+ duration_s: float
53
+ score: int | None = None
54
+ issues: list[str] = field(default_factory=list)
55
+ detail: str = ""
56
+
57
+
58
+ @dataclass
59
+ class EditorialResult:
60
+ final_text: str
61
+ stages: list[StageResult]
62
+ flagged: bool
63
+ revision_trace: dict[str, Any]
64
+
65
+
66
+ _DEV_EDIT_SYSTEM = """You are a developmental editor. Improve the draft for:
67
+ - structure (does the opening hook? does it close cleanly?)
68
+ - argument (is each section earning its place?)
69
+ - specificity (is anything generic or hand-wavy?)
70
+
71
+ Preserve the project voice strictly. Return only the revised content.
72
+ """
73
+
74
+ _LINE_EDIT_SYSTEM = """You are a line editor. Improve the draft for:
75
+ - sentence rhythm (vary length; avoid monotone)
76
+ - voice fidelity (match the voice contract precisely)
77
+ - word choice (specific, concrete, never vague)
78
+
79
+ Preserve structure and meaning. Return only the revised content.
80
+ """
81
+
82
+ _COPY_EDIT_SYSTEM = """You are a copy editor. Improve the draft for:
83
+ - grammar, punctuation, agreement
84
+ - code blocks (correct syntax, language tags, working examples)
85
+ - consistency (capitalization, terminology, tense)
86
+
87
+ Make minimal changes; preserve voice. Return only the revised content.
88
+ """
89
+
90
+
91
+ def _make_user(text: str, voice: str, style: str, content_type: str, extra: str = "") -> str:
92
+ parts = [
93
+ f"Content type: {content_type}",
94
+ "",
95
+ "Voice contract:",
96
+ voice or "(none yet)",
97
+ "",
98
+ "House style:",
99
+ style or "(none yet)",
100
+ "",
101
+ ]
102
+ if extra:
103
+ parts.extend(["Additional notes:", extra, ""])
104
+ parts.extend(["Draft:", text])
105
+ return "\n".join(parts)
106
+
107
+
108
+ async def _editorial_stage(
109
+ *,
110
+ name: str,
111
+ system: str,
112
+ text_before: str,
113
+ voice: str,
114
+ style: str,
115
+ content_type: str,
116
+ llm_client,
117
+ extra: str = "",
118
+ ) -> tuple[str, StageResult]:
119
+ t0 = time.monotonic()
120
+ user = _make_user(text_before, voice, style, content_type, extra)
121
+ revised, trace = await llm_client.generate_with_revision(
122
+ system_prompt=system,
123
+ user_prompt=user,
124
+ min_score=7,
125
+ max_rounds=2,
126
+ )
127
+ final_score = getattr(trace, "final_score", None)
128
+ rounds = getattr(trace, "revision_rounds", 0)
129
+ return revised, StageResult(
130
+ name=name,
131
+ text_before=text_before,
132
+ text_after=revised,
133
+ duration_s=round(time.monotonic() - t0, 3),
134
+ score=final_score,
135
+ detail=f"rounds={rounds}",
136
+ )
137
+
138
+
139
+ async def _slop_stage(
140
+ *,
141
+ text_before: str,
142
+ blocklist: list[str],
143
+ voice: str,
144
+ llm_client,
145
+ ) -> tuple[str, StageResult]:
146
+ t0 = time.monotonic()
147
+ regex_hits = find_slop(text_before, blocklist)
148
+ lint_hits = await llm_lint(text_before, voice, llm_client)
149
+ if not regex_hits and not lint_hits:
150
+ return text_before, StageResult(
151
+ name="anti_slop",
152
+ text_before=text_before,
153
+ text_after=text_before,
154
+ duration_s=round(time.monotonic() - t0, 3),
155
+ detail="clean",
156
+ )
157
+ rewritten = await force_rewrite(text_before, regex_hits, lint_hits, voice, llm_client)
158
+ # Re-check after rewrite.
159
+ re_regex = find_slop(rewritten, blocklist)
160
+ re_lint = await llm_lint(rewritten, voice, llm_client)
161
+ if re_regex or re_lint:
162
+ offenders = sorted({h.phrase for h in re_regex} | set(re_lint))
163
+ raise AbortLoud("Slop persisted after rewrite: " + ", ".join(offenders))
164
+ return rewritten, StageResult(
165
+ name="anti_slop",
166
+ text_before=text_before,
167
+ text_after=rewritten,
168
+ duration_s=round(time.monotonic() - t0, 3),
169
+ issues=sorted({h.phrase for h in regex_hits} | set(lint_hits)),
170
+ detail="rewrite_applied",
171
+ )
172
+
173
+
174
+ async def _persona_stage(
175
+ *,
176
+ text: str,
177
+ content_type: str,
178
+ voice: str,
179
+ llm_client,
180
+ ) -> StageResult:
181
+ t0 = time.monotonic()
182
+ res = await test_against_persona(
183
+ text=text, content_type=content_type, voice=voice, llm_client=llm_client
184
+ )
185
+ issues = []
186
+ if res.score < 7:
187
+ issues.append(f"Persona score {res.score} < 7")
188
+ if res.weak_sections:
189
+ issues.extend(res.weak_sections)
190
+ return StageResult(
191
+ name="persona",
192
+ text_before=text,
193
+ text_after=text,
194
+ duration_s=round(time.monotonic() - t0, 3),
195
+ score=res.score,
196
+ issues=issues,
197
+ detail=res.feedback,
198
+ )
199
+
200
+
201
+ def _readability_stage(*, text: str, content_type: str, style_md: str) -> StageResult:
202
+ t0 = time.monotonic()
203
+ targets = get_targets(content_type, style_md)
204
+ scores = compute_readability(text)
205
+ issues = check_against_target(scores, targets)
206
+ return StageResult(
207
+ name="readability",
208
+ text_before=text,
209
+ text_after=text,
210
+ duration_s=round(time.monotonic() - t0, 3),
211
+ issues=issues,
212
+ detail=f"FRE={scores.flesch_reading_ease}, MSL={scores.mean_sentence_length}",
213
+ )
214
+
215
+
216
+ async def run_pipeline(
217
+ *,
218
+ initial_draft: str,
219
+ content_type: str,
220
+ project_paths: ProjectPaths,
221
+ llm_client,
222
+ ) -> EditorialResult:
223
+ """Run the 8-stage editorial pipeline. See module docstring."""
224
+ voice = load_voice(project_paths)
225
+ style_md = load_style(project_paths)
226
+ blocklist = parse_blocklist(
227
+ project_paths.slop_file.read_text(encoding="utf-8")
228
+ if project_paths.slop_file.is_file()
229
+ else ""
230
+ )
231
+
232
+ # Fail-fast on unknown content_type before any LLM spend.
233
+ get_targets(content_type, style_md)
234
+
235
+ stages: list[StageResult] = []
236
+
237
+ # Stages 2-4: editorial loops.
238
+ text, sr = await _editorial_stage(
239
+ name="developmental_edit",
240
+ system=_DEV_EDIT_SYSTEM,
241
+ text_before=initial_draft,
242
+ voice=voice,
243
+ style=style_md,
244
+ content_type=content_type,
245
+ llm_client=llm_client,
246
+ )
247
+ stages.append(sr)
248
+
249
+ text, sr = await _editorial_stage(
250
+ name="line_edit",
251
+ system=_LINE_EDIT_SYSTEM,
252
+ text_before=text,
253
+ voice=voice,
254
+ style=style_md,
255
+ content_type=content_type,
256
+ llm_client=llm_client,
257
+ )
258
+ stages.append(sr)
259
+
260
+ text, sr = await _editorial_stage(
261
+ name="copy_edit",
262
+ system=_COPY_EDIT_SYSTEM,
263
+ text_before=text,
264
+ voice=voice,
265
+ style=style_md,
266
+ content_type=content_type,
267
+ llm_client=llm_client,
268
+ )
269
+ stages.append(sr)
270
+
271
+ # Stage 5: anti-slop. May raise AbortLoud — let it propagate.
272
+ text, sr = await _slop_stage(
273
+ text_before=text,
274
+ blocklist=blocklist,
275
+ voice=voice,
276
+ llm_client=llm_client,
277
+ )
278
+ stages.append(sr)
279
+
280
+ # Stage 6: persona.
281
+ persona_sr = await _persona_stage(
282
+ text=text,
283
+ content_type=content_type,
284
+ voice=voice,
285
+ llm_client=llm_client,
286
+ )
287
+ stages.append(persona_sr)
288
+
289
+ # Stage 7: readability.
290
+ readability_sr = _readability_stage(text=text, content_type=content_type, style_md=style_md)
291
+ stages.append(readability_sr)
292
+
293
+ # Re-loop into copy-edit if either soft gate failed.
294
+ flagged = False
295
+ if persona_sr.issues or readability_sr.issues:
296
+ extra = "Previous persona feedback: " + (persona_sr.detail or "")
297
+ if readability_sr.issues:
298
+ extra += "\nReadability issues: " + "; ".join(readability_sr.issues)
299
+ text, sr = await _editorial_stage(
300
+ name="copy_edit",
301
+ system=_COPY_EDIT_SYSTEM,
302
+ text_before=text,
303
+ voice=voice,
304
+ style=style_md,
305
+ content_type=content_type,
306
+ llm_client=llm_client,
307
+ extra=extra,
308
+ )
309
+ stages.append(sr)
310
+
311
+ # Re-run anti-slop, persona, readability one more time.
312
+ text, sr = await _slop_stage(
313
+ text_before=text,
314
+ blocklist=blocklist,
315
+ voice=voice,
316
+ llm_client=llm_client,
317
+ )
318
+ stages.append(sr)
319
+
320
+ persona2 = await _persona_stage(
321
+ text=text,
322
+ content_type=content_type,
323
+ voice=voice,
324
+ llm_client=llm_client,
325
+ )
326
+ stages.append(persona2)
327
+
328
+ readability2 = _readability_stage(text=text, content_type=content_type, style_md=style_md)
329
+ stages.append(readability2)
330
+
331
+ # Readability re-runs are informational only — short test/mock text
332
+ # often fails MSL but the persona pass is what gates "ship vs flag".
333
+ # Only persona2 failure flips the flagged bit.
334
+ if persona2.issues:
335
+ logger.warning(
336
+ "editorial pipeline shipping with flagged=True for content_type=%s "
337
+ "(persona score %s)",
338
+ content_type,
339
+ persona2.score,
340
+ )
341
+ flagged = True
342
+
343
+ revision_trace = {
344
+ "content_type": content_type,
345
+ "voice_present": bool(voice),
346
+ "style_present": bool(style_md),
347
+ "blocklist_size": len(blocklist),
348
+ "stages": [asdict(s) for s in stages],
349
+ "flagged": flagged,
350
+ }
351
+
352
+ return EditorialResult(
353
+ final_text=text,
354
+ stages=stages,
355
+ flagged=flagged,
356
+ revision_trace=revision_trace,
357
+ )
@@ -0,0 +1,84 @@
1
+ """Stage 6 of the editorial pipeline.
2
+
3
+ Single Haiku call against a fixed persona — "skeptical senior backend
4
+ developer" — that scores the draft 1-10 on resonance and flags weak
5
+ sections with quoted excerpts. The orchestrator uses the score as a soft
6
+ gate: if it falls below 7, control returns to copy-edit (stage 4) once
7
+ with the persona feedback attached as a critique.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ from dataclasses import dataclass
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class PersonaResult:
18
+ score: int
19
+ weak_sections: list[str]
20
+ feedback: str
21
+
22
+
23
+ _SYSTEM_PROMPT = """You are a skeptical senior backend developer with 10+ years of experience. You're allergic to marketing fluff, consultant-speak, and AI-style hedging. You've read enough developer-targeted content to instantly spot when a piece is generic, surface-level, or written for a project the author hasn't actually used.
24
+
25
+ Score the content on a 1-10 scale:
26
+ - 10 = This made me want to try the product immediately. Specific, concrete, technical.
27
+ - 7-9 = Solid. I'd send it to a teammate.
28
+ - 4-6 = Generic. Could be about any product. Skim-level.
29
+ - 1-3 = Pure marketing. I'd close the tab.
30
+
31
+ Identify up to 3 weak sections — quote them verbatim or paraphrase tightly. Be honest; don't pad.
32
+
33
+ Return strict JSON:
34
+ {
35
+ "score": 1-10,
36
+ "weak_sections": ["…", "…"],
37
+ "feedback": "1-2 sentences on what's working and what isn't"
38
+ }
39
+ """
40
+
41
+
42
+ def _coerce_result(raw: str) -> PersonaResult:
43
+ try:
44
+ data = json.loads(raw.strip())
45
+ except json.JSONDecodeError:
46
+ return PersonaResult(
47
+ score=5,
48
+ weak_sections=[],
49
+ feedback="Could not parse persona response as JSON.",
50
+ )
51
+ score = int(data.get("score", 5))
52
+ score = max(1, min(10, score))
53
+ weak = list(data.get("weak_sections", []) or [])
54
+ feedback = str(data.get("feedback", ""))
55
+ return PersonaResult(score=score, weak_sections=weak, feedback=feedback)
56
+
57
+
58
+ async def test_against_persona(
59
+ *,
60
+ text: str,
61
+ content_type: str,
62
+ voice: str,
63
+ llm_client,
64
+ ) -> PersonaResult:
65
+ """Single Haiku call. Returns a structured score + weak-sections + feedback."""
66
+ user = (
67
+ f"Content type: {content_type}\n\n"
68
+ "Voice contract for this product:\n\n"
69
+ + (voice or "(no voice profile yet)")
70
+ + "\n\nContent to evaluate:\n\n"
71
+ + text
72
+ )
73
+ raw = await llm_client.generate(
74
+ system_prompt=_SYSTEM_PROMPT,
75
+ user_prompt=user,
76
+ model="haiku",
77
+ )
78
+ return _coerce_result(raw)
79
+
80
+
81
+ # Tell pytest not to collect the function as a test when it's imported
82
+ # into a test_ module. The `test_` prefix here means "evaluate against a
83
+ # persona," not a unit test.
84
+ test_against_persona.__test__ = False # type: ignore[attr-defined]
@@ -0,0 +1,148 @@
1
+ """Pure-Python readability scoring: Flesch Reading Ease, sentence-length
2
+ statistics, jargon density. No LLM calls. Used as stage 7 of the
3
+ editorial pipeline.
4
+
5
+ Flesch Reading Ease formula:
6
+ FRE = 206.835 - 1.015 * (words/sentences) - 84.6 * (syllables/words)
7
+
8
+ Higher FRE = easier to read. Style.md targets per content-type are
9
+ expressed in this scale (e.g., tutorial 50-65 = "fairly difficult").
10
+
11
+ Jargon density: fraction of content words >= 12 characters. Imperfect but
12
+ catches the obvious "academic-ese" drift; tuned alongside style.md targets.
13
+
14
+ Drift tolerance: a score that's within 10 points of the target range
15
+ (below min or above max) does not flag. The pipeline only reverts to copy
16
+ edit (stage 4) if drift exceeds 10 points OR sentence length exits the
17
+ target range entirely.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import re
23
+ import statistics
24
+ from dataclasses import dataclass
25
+
26
+ from devrel_origin.quality.style import ContentTypeTargets
27
+
28
+ DRIFT_TOLERANCE = 10 # Flesch points
29
+
30
+ _VOWELS = set("aeiouy")
31
+ _WORD_RE = re.compile(r"\b[a-zA-Z']+\b")
32
+ _SENTENCE_END = re.compile(r"[.!?]+(?=\s|$)")
33
+
34
+
35
+ def count_syllables(word: str) -> int:
36
+ """Approximate syllable count.
37
+
38
+ Algorithm:
39
+ 1. Count vowel groups (consecutive vowels = one group). y counts as
40
+ a vowel.
41
+ 2. Silent terminal-e rule: if word ends in `e` AND syllables > 1,
42
+ drop one.
43
+ 3. `[consonant]le` exception: words ending in consonant + "le"
44
+ (simple, syllable, table) keep the final "le" syllable. The
45
+ silent-e rule does not fire for these.
46
+ """
47
+ if not word:
48
+ return 0
49
+ word = word.lower()
50
+ syllables = 0
51
+ prev_vowel = False
52
+ for ch in word:
53
+ is_v = ch in _VOWELS
54
+ if is_v and not prev_vowel:
55
+ syllables += 1
56
+ prev_vowel = is_v
57
+ # Terminal silent-e, but preserve `[consonant]le` endings.
58
+ if word.endswith("e") and syllables > 1:
59
+ ends_in_consonant_le = len(word) >= 3 and word[-2:] == "le" and word[-3] not in _VOWELS
60
+ if not ends_in_consonant_le:
61
+ syllables -= 1
62
+ return max(1, syllables)
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class ReadabilityScores:
67
+ flesch_reading_ease: float
68
+ mean_sentence_length: float
69
+ sentence_length_variance: float
70
+ jargon_density: float
71
+ word_count: int
72
+ sentence_count: int
73
+
74
+
75
+ def _split_sentences(text: str) -> list[str]:
76
+ """Split on terminal punctuation. Filters empty fragments."""
77
+ pieces = _SENTENCE_END.split(text)
78
+ out = [p.strip() for p in pieces if p.strip()]
79
+ return out
80
+
81
+
82
+ def _words_in(text: str) -> list[str]:
83
+ return _WORD_RE.findall(text)
84
+
85
+
86
+ def compute_readability(text: str) -> ReadabilityScores:
87
+ """Compute all readability metrics for `text`."""
88
+ sentences = _split_sentences(text)
89
+ sentence_count = len(sentences)
90
+ words = _words_in(text)
91
+ word_count = len(words)
92
+
93
+ if word_count == 0 or sentence_count == 0:
94
+ return ReadabilityScores(0.0, 0.0, 0.0, 0.0, word_count, sentence_count)
95
+
96
+ syllable_total = sum(count_syllables(w) for w in words)
97
+ flesch = 206.835 - 1.015 * (word_count / sentence_count) - 84.6 * (syllable_total / word_count)
98
+
99
+ sentence_lengths = [len(_words_in(s)) for s in sentences]
100
+ mean_sl = statistics.mean(sentence_lengths)
101
+ var_sl = statistics.pvariance(sentence_lengths) if sentence_count > 1 else 0.0
102
+
103
+ long_words = sum(1 for w in words if len(w) >= 12)
104
+ jargon = long_words / word_count
105
+
106
+ return ReadabilityScores(
107
+ flesch_reading_ease=round(flesch, 2),
108
+ mean_sentence_length=round(mean_sl, 2),
109
+ sentence_length_variance=round(var_sl, 2),
110
+ jargon_density=round(jargon, 4),
111
+ word_count=word_count,
112
+ sentence_count=sentence_count,
113
+ )
114
+
115
+
116
+ def check_against_target(
117
+ scores: ReadabilityScores,
118
+ target: ContentTypeTargets,
119
+ ) -> list[str]:
120
+ """Return a list of human-readable issues, or empty if scores meet the
121
+ target within DRIFT_TOLERANCE for Flesch and exactly for sentence length."""
122
+ issues: list[str] = []
123
+
124
+ if scores.flesch_reading_ease < target.flesch_min - DRIFT_TOLERANCE:
125
+ issues.append(
126
+ f"Flesch reading ease {scores.flesch_reading_ease} is "
127
+ f"{target.flesch_min - scores.flesch_reading_ease:.1f} points below "
128
+ f"min {target.flesch_min} (drift > {DRIFT_TOLERANCE})."
129
+ )
130
+ elif scores.flesch_reading_ease > target.flesch_max + DRIFT_TOLERANCE:
131
+ issues.append(
132
+ f"Flesch reading ease {scores.flesch_reading_ease} is "
133
+ f"{scores.flesch_reading_ease - target.flesch_max:.1f} points above "
134
+ f"max {target.flesch_max} (drift > {DRIFT_TOLERANCE})."
135
+ )
136
+
137
+ if scores.mean_sentence_length < target.sentence_len_min:
138
+ issues.append(
139
+ f"Mean sentence length {scores.mean_sentence_length} below "
140
+ f"min {target.sentence_len_min}."
141
+ )
142
+ elif scores.mean_sentence_length > target.sentence_len_max:
143
+ issues.append(
144
+ f"Mean sentence length {scores.mean_sentence_length} above "
145
+ f"max {target.sentence_len_max}."
146
+ )
147
+
148
+ return issues