devrel-origin 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. devrel_origin/__init__.py +15 -0
  2. devrel_origin/cli/__init__.py +92 -0
  3. devrel_origin/cli/_common.py +243 -0
  4. devrel_origin/cli/analytics.py +28 -0
  5. devrel_origin/cli/argus.py +497 -0
  6. devrel_origin/cli/auth.py +227 -0
  7. devrel_origin/cli/config.py +108 -0
  8. devrel_origin/cli/content.py +259 -0
  9. devrel_origin/cli/cost.py +108 -0
  10. devrel_origin/cli/cro.py +298 -0
  11. devrel_origin/cli/deliverables.py +65 -0
  12. devrel_origin/cli/docs.py +91 -0
  13. devrel_origin/cli/doctor.py +178 -0
  14. devrel_origin/cli/experiment.py +29 -0
  15. devrel_origin/cli/growth.py +97 -0
  16. devrel_origin/cli/init.py +472 -0
  17. devrel_origin/cli/intel.py +27 -0
  18. devrel_origin/cli/kb.py +96 -0
  19. devrel_origin/cli/listen.py +31 -0
  20. devrel_origin/cli/marketing.py +66 -0
  21. devrel_origin/cli/migrate.py +45 -0
  22. devrel_origin/cli/run.py +46 -0
  23. devrel_origin/cli/sales.py +57 -0
  24. devrel_origin/cli/schedule.py +62 -0
  25. devrel_origin/cli/synthesize.py +28 -0
  26. devrel_origin/cli/triage.py +29 -0
  27. devrel_origin/cli/video.py +35 -0
  28. devrel_origin/core/__init__.py +58 -0
  29. devrel_origin/core/agent_config.py +75 -0
  30. devrel_origin/core/argus.py +964 -0
  31. devrel_origin/core/atlas.py +1450 -0
  32. devrel_origin/core/base.py +372 -0
  33. devrel_origin/core/cyra.py +563 -0
  34. devrel_origin/core/dex.py +708 -0
  35. devrel_origin/core/echo.py +614 -0
  36. devrel_origin/core/growth/__init__.py +27 -0
  37. devrel_origin/core/growth/recommendations.py +219 -0
  38. devrel_origin/core/growth/target_kinds.py +51 -0
  39. devrel_origin/core/iris.py +513 -0
  40. devrel_origin/core/kai.py +1367 -0
  41. devrel_origin/core/llm.py +542 -0
  42. devrel_origin/core/llm_backends.py +274 -0
  43. devrel_origin/core/mox.py +514 -0
  44. devrel_origin/core/nova.py +349 -0
  45. devrel_origin/core/pax.py +1205 -0
  46. devrel_origin/core/rex.py +532 -0
  47. devrel_origin/core/sage.py +486 -0
  48. devrel_origin/core/sentinel.py +385 -0
  49. devrel_origin/core/types.py +98 -0
  50. devrel_origin/core/video/__init__.py +22 -0
  51. devrel_origin/core/video/assembler.py +131 -0
  52. devrel_origin/core/video/browser_recorder.py +118 -0
  53. devrel_origin/core/video/desktop_recorder.py +254 -0
  54. devrel_origin/core/video/overlay_renderer.py +143 -0
  55. devrel_origin/core/video/script_parser.py +147 -0
  56. devrel_origin/core/video/tts_engine.py +82 -0
  57. devrel_origin/core/vox.py +268 -0
  58. devrel_origin/core/watchdog.py +321 -0
  59. devrel_origin/project/__init__.py +1 -0
  60. devrel_origin/project/config.py +75 -0
  61. devrel_origin/project/cost_sink.py +61 -0
  62. devrel_origin/project/init.py +104 -0
  63. devrel_origin/project/paths.py +75 -0
  64. devrel_origin/project/state.py +241 -0
  65. devrel_origin/project/templates/__init__.py +4 -0
  66. devrel_origin/project/templates/config.toml +24 -0
  67. devrel_origin/project/templates/devrel.gitignore +10 -0
  68. devrel_origin/project/templates/slop-blocklist.md +45 -0
  69. devrel_origin/project/templates/style.md +24 -0
  70. devrel_origin/project/templates/voice.md +29 -0
  71. devrel_origin/quality/__init__.py +66 -0
  72. devrel_origin/quality/editorial.py +357 -0
  73. devrel_origin/quality/persona.py +84 -0
  74. devrel_origin/quality/readability.py +148 -0
  75. devrel_origin/quality/slop.py +167 -0
  76. devrel_origin/quality/style.py +110 -0
  77. devrel_origin/quality/voice.py +15 -0
  78. devrel_origin/tools/__init__.py +9 -0
  79. devrel_origin/tools/analytics.py +304 -0
  80. devrel_origin/tools/api_client.py +393 -0
  81. devrel_origin/tools/apollo_client.py +305 -0
  82. devrel_origin/tools/code_validator.py +428 -0
  83. devrel_origin/tools/github_tools.py +297 -0
  84. devrel_origin/tools/instantly_client.py +412 -0
  85. devrel_origin/tools/kb_harvester.py +340 -0
  86. devrel_origin/tools/mcp_server.py +578 -0
  87. devrel_origin/tools/notifications.py +245 -0
  88. devrel_origin/tools/run_report.py +193 -0
  89. devrel_origin/tools/scheduler.py +231 -0
  90. devrel_origin/tools/search_tools.py +321 -0
  91. devrel_origin/tools/self_improve.py +168 -0
  92. devrel_origin/tools/sheets.py +236 -0
  93. devrel_origin-0.2.14.dist-info/METADATA +354 -0
  94. devrel_origin-0.2.14.dist-info/RECORD +98 -0
  95. devrel_origin-0.2.14.dist-info/WHEEL +5 -0
  96. devrel_origin-0.2.14.dist-info/entry_points.txt +2 -0
  97. devrel_origin-0.2.14.dist-info/licenses/LICENSE +21 -0
  98. devrel_origin-0.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,385 @@
1
+ """
2
+ Sentinel — Brand Consistency Auditor Agent
3
+
4
+ Audits all agent outputs for brand voice consistency, messaging alignment,
5
+ ICP accuracy, and content quality. Produces a scored audit report with
6
+ specific remediation recommendations.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any, Optional
14
+
15
+ from devrel_origin.core.base import load_agent_prompt, strip_markdown_fences
16
+ from devrel_origin.core.llm import LLMClient
17
+ from devrel_origin.tools.api_client import PostHogClient
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ # Per-agent map of content fields to scan. Each agent stores its prose
23
+ # under a different key (Mox under "blog_post", Pax under "body", etc.),
24
+ # so Sentinel must check each agent's actual primary field rather than
25
+ # assuming a universal "content" key. Order within the list = priority;
26
+ # the first non-empty value wins.
27
+ _AGENT_CONTENT_FIELDS: dict[str, list[str]] = {
28
+ "kai_content": ["content", "body"],
29
+ "mox_campaigns": [
30
+ "blog_post",
31
+ "landing_page",
32
+ "social_batch",
33
+ "campaign_brief",
34
+ "content",
35
+ ],
36
+ "pax_sales": ["body", "battle_card", "sequence", "content"],
37
+ "rex_competitive": ["analysis", "summary", "content"],
38
+ "dex_docs": ["architecture", "api_reference", "content"],
39
+ "iris_themes": ["recommendations", "content"],
40
+ "vox_video": ["script", "content"],
41
+ "sage_triage": ["content"],
42
+ "echo_social": ["content"],
43
+ }
44
+
45
+
46
+ def _safe_json_loads(raw: str) -> dict[str, Any]:
47
+ """Parse JSON even when an LLM wraps it in prose or markdown fences."""
48
+ cleaned = strip_markdown_fences(raw or "").strip()
49
+ try:
50
+ parsed = json.loads(cleaned)
51
+ return parsed if isinstance(parsed, dict) else {}
52
+ except json.JSONDecodeError:
53
+ start = cleaned.find("{")
54
+ end = cleaned.rfind("}")
55
+ if start == -1 or end == -1 or end <= start:
56
+ raise
57
+ parsed = json.loads(cleaned[start : end + 1])
58
+ return parsed if isinstance(parsed, dict) else {}
59
+
60
+
61
+ @dataclass
62
+ class AuditItem:
63
+ """Audit result for a single content piece."""
64
+
65
+ agent: str
66
+ content_type: str
67
+ score: int # 1-10
68
+ passed: bool
69
+ issues: list[dict[str, str]] = field(default_factory=list)
70
+ strengths: list[str] = field(default_factory=list)
71
+
72
+
73
+ @dataclass
74
+ class BrandAuditReport:
75
+ """Complete brand consistency audit."""
76
+
77
+ overall_score: int # 1-100
78
+ items: list[AuditItem]
79
+ voice_consistency: int # 1-10
80
+ icp_alignment: int # 1-10
81
+ messaging_coherence: int # 1-10
82
+ cross_piece_issues: list[str]
83
+ recommendations: list[str]
84
+
85
+
86
+ class Sentinel:
87
+ """
88
+ Brand Consistency Auditor agent.
89
+
90
+ Capabilities:
91
+ - Audit generated content for brand voice adherence
92
+ - Check ICP alignment across all outputs
93
+ - Verify messaging consistency between agents
94
+ - Score content quality on multiple dimensions
95
+ - Produce remediation recommendations
96
+ """
97
+
98
+ _DEFAULT_SYSTEM_PROMPT = """You are Sentinel, a brand consistency auditor. \
99
+ You review all content produced by the agent system and flag deviations from \
100
+ brand standards.
101
+
102
+ Audit dimensions:
103
+ 1. VOICE — Developer-authentic, not corporate marketing. No buzzwords, no fluff.
104
+ 2. ICP ALIGNMENT — Content targets the right audience (DevTools founders, \
105
+ engineering leaders, developer advocates).
106
+ 3. MESSAGING COHERENCE — All pieces tell a consistent story. No contradictions \
107
+ between what Kai's tutorial says and Mox's landing page claims.
108
+ 4. TECHNICAL ACCURACY — Claims are grounded, code examples work, APIs exist.
109
+ 5. CTA CONSISTENCY — Each piece has one clear CTA appropriate to its funnel stage.
110
+ 6. FORMATTING — Short paragraphs, clear hierarchy, scannable structure.
111
+
112
+ Scoring:
113
+ - 9-10: Exceptional, publish immediately
114
+ - 7-8: Good, minor polish needed
115
+ - 5-6: Acceptable with edits
116
+ - 3-4: Significant issues, needs rewrite
117
+ - 1-2: Off-brand, reject
118
+
119
+ Be strict. Generic AI slop scores 3-4 regardless of technical accuracy."""
120
+
121
+ @property
122
+ def SYSTEM_PROMPT(self) -> str:
123
+ return load_agent_prompt(
124
+ "sentinel",
125
+ "system_prompt.txt",
126
+ self._DEFAULT_SYSTEM_PROMPT,
127
+ )
128
+
129
+ def __init__(
130
+ self,
131
+ api_client: PostHogClient,
132
+ knowledge_base_path: Path,
133
+ llm_client: Optional[LLMClient] = None,
134
+ ):
135
+ self.api_client = api_client
136
+ self.knowledge_base_path = knowledge_base_path
137
+ self.llm_client = llm_client
138
+
139
+ async def execute(
140
+ self,
141
+ task: str,
142
+ context: Optional[dict[str, Any]] = None,
143
+ ) -> dict[str, Any]:
144
+ """Run brand consistency audit on all generated content."""
145
+ logger.info(f"Sentinel executing: {task[:80]}...")
146
+
147
+ # Collect all content pieces from context
148
+ pieces = self._collect_content(context)
149
+ if not pieces:
150
+ return {
151
+ "agent": "sentinel",
152
+ "task": task,
153
+ "status": "no_content",
154
+ "overall_score": 0,
155
+ "message": "No content found to audit",
156
+ }
157
+
158
+ # Run LLM audit if available
159
+ if self.llm_client:
160
+ return await self._llm_audit(task, pieces)
161
+
162
+ # Fallback: basic structural checks
163
+ return self._structural_audit(task, pieces)
164
+
165
+ def _collect_content(
166
+ self,
167
+ context: Any,
168
+ ) -> list[dict[str, str]]:
169
+ """Extract all content pieces from SharedContext for auditing.
170
+
171
+ Walks ``_AGENT_CONTENT_FIELDS`` and picks the first non-empty
172
+ candidate field per agent. Each agent has a different primary
173
+ field (Mox stores under ``blog_post``, Pax under ``body``, etc.),
174
+ so a universal "content" key would silently audit only Kai.
175
+ """
176
+ pieces: list[dict[str, str]] = []
177
+ if not context:
178
+ return pieces
179
+
180
+ ctx_dict = context.to_dict() if hasattr(context, "to_dict") else dict(context)
181
+
182
+ for context_key, candidate_fields in _AGENT_CONTENT_FIELDS.items():
183
+ agent_data = ctx_dict.get(context_key, {})
184
+ if not isinstance(agent_data, dict):
185
+ continue
186
+ for fld in candidate_fields:
187
+ value = agent_data.get(fld)
188
+ if isinstance(value, str) and value.strip():
189
+ pieces.append(
190
+ {
191
+ "agent": context_key,
192
+ "content_type": fld,
193
+ "content": value[:5000],
194
+ }
195
+ )
196
+ break # one piece per agent
197
+ if isinstance(value, list) and value:
198
+ joined = "\n\n".join(str(v) for v in value[:3])[:5000]
199
+ if joined.strip():
200
+ pieces.append(
201
+ {
202
+ "agent": context_key,
203
+ "content_type": fld,
204
+ "content": joined,
205
+ }
206
+ )
207
+ break
208
+
209
+ return pieces
210
+
211
+ async def _llm_audit(
212
+ self,
213
+ task: str,
214
+ pieces: list[dict[str, str]],
215
+ ) -> dict[str, Any]:
216
+ """Run comprehensive LLM-powered brand audit."""
217
+ pieces_text = ""
218
+ for p in pieces:
219
+ pieces_text += (
220
+ f"\n\n--- [{p['agent'].upper()} — {p['content_type']}] ---\n{p['content']}\n"
221
+ )
222
+
223
+ prompt = f"""Audit all content pieces below for brand consistency.
224
+
225
+ {pieces_text}
226
+
227
+ For each piece, evaluate:
228
+ 1. Voice score (1-10): developer-authentic vs marketing fluff
229
+ 2. ICP alignment (1-10): targets right audience
230
+ 3. Technical accuracy (1-10): claims grounded, code correct
231
+ 4. CTA clarity (1-10): one clear next step
232
+ 5. Formatting (1-10): scannable, short paragraphs
233
+
234
+ Also evaluate cross-piece consistency:
235
+ - Do pieces contradict each other?
236
+ - Is the messaging aligned across all agents?
237
+ - Are the same features described the same way?
238
+
239
+ Return JSON:
240
+ {{
241
+ "overall_score": <1-100>,
242
+ "voice_consistency": <1-10>,
243
+ "icp_alignment": <1-10>,
244
+ "messaging_coherence": <1-10>,
245
+ "items": [
246
+ {{
247
+ "agent": "...",
248
+ "content_type": "...",
249
+ "score": <1-10>,
250
+ "passed": true/false,
251
+ "issues": [{{"dimension": "...", "severity": "high|medium|low", "detail": "..."}}],
252
+ "strengths": ["..."]
253
+ }}
254
+ ],
255
+ "cross_piece_issues": ["..."],
256
+ "recommendations": ["..."]
257
+ }}"""
258
+
259
+ raw = ""
260
+ try:
261
+ raw = await self.llm_client.generate(
262
+ system_prompt=self.SYSTEM_PROMPT,
263
+ user_prompt=prompt,
264
+ temperature=0.2,
265
+ max_tokens=4096,
266
+ )
267
+ audit = _safe_json_loads(raw)
268
+ return {
269
+ "agent": "sentinel",
270
+ "task": task,
271
+ "status": "audited",
272
+ **audit,
273
+ }
274
+ except json.JSONDecodeError as exc:
275
+ logger.warning(
276
+ "Sentinel LLM audit returned non-JSON response; marking audit_failed. "
277
+ "error=%s raw_head=%r",
278
+ exc,
279
+ (raw or "")[:200],
280
+ )
281
+ logger.debug("Full raw response: %s", raw)
282
+ return {
283
+ "agent": "sentinel",
284
+ "task": task,
285
+ "status": "audit_failed",
286
+ "overall_score": 0,
287
+ "items": [],
288
+ "cross_piece_issues": [],
289
+ "recommendations": ["Retry Sentinel audit; model response was not valid JSON."],
290
+ "error": str(exc),
291
+ }
292
+ except Exception as exc:
293
+ logger.warning(
294
+ "Sentinel LLM audit API error; falling back to structural: %s",
295
+ exc,
296
+ )
297
+ return self._structural_audit(task, pieces)
298
+
299
+ def _structural_audit(
300
+ self,
301
+ task: str,
302
+ pieces: list[dict[str, str]],
303
+ ) -> dict[str, Any]:
304
+ """Fallback: basic structural quality checks without LLM."""
305
+ items = []
306
+ total_score = 0
307
+
308
+ for p in pieces:
309
+ content = p["content"]
310
+ issues = []
311
+ score = 7 # Start at passing
312
+
313
+ # Check paragraph length
314
+ paragraphs = content.split("\n\n")
315
+ long_paras = [pp for pp in paragraphs if len(pp.split()) > 100]
316
+ if long_paras:
317
+ issues.append(
318
+ {
319
+ "dimension": "formatting",
320
+ "severity": "medium",
321
+ "detail": f"{len(long_paras)} paragraphs exceed 100 words",
322
+ }
323
+ )
324
+ score -= 1
325
+
326
+ # Check for heading structure
327
+ if "## " not in content and "# " not in content:
328
+ issues.append(
329
+ {
330
+ "dimension": "formatting",
331
+ "severity": "medium",
332
+ "detail": "No heading hierarchy found",
333
+ }
334
+ )
335
+ score -= 1
336
+
337
+ # Check for buzzwords
338
+ buzzwords = [
339
+ "revolutionary",
340
+ "game-changing",
341
+ "cutting-edge",
342
+ "best-in-class",
343
+ "world-class",
344
+ "synergy",
345
+ "leverage",
346
+ "disrupt",
347
+ "paradigm",
348
+ ]
349
+ found_buzzwords = [b for b in buzzwords if b in content.lower()]
350
+ if found_buzzwords:
351
+ issues.append(
352
+ {
353
+ "dimension": "voice",
354
+ "severity": "high",
355
+ "detail": f"Marketing buzzwords found: {', '.join(found_buzzwords)}",
356
+ }
357
+ )
358
+ score -= 2
359
+
360
+ items.append(
361
+ {
362
+ "agent": p["agent"],
363
+ "content_type": p["content_type"],
364
+ "score": max(1, score),
365
+ "passed": score >= 6,
366
+ "issues": issues,
367
+ }
368
+ )
369
+ total_score += max(1, score)
370
+
371
+ # Map item average from 1-7 scale onto 10-100 scale linearly so the
372
+ # structural fallback produces scores comparable to the LLM 1-100 path:
373
+ # item_avg = 1 → 10, item_avg = 4 → 55, item_avg = 7 → 100
374
+ average_item = total_score / max(len(items), 1)
375
+ overall = int(round(((average_item - 1) / 6) * 90 + 10))
376
+ overall = max(0, min(100, overall))
377
+ return {
378
+ "agent": "sentinel",
379
+ "task": task,
380
+ "status": "audited_structural",
381
+ "overall_score": overall,
382
+ "items": items,
383
+ "cross_piece_issues": [],
384
+ "recommendations": [],
385
+ }
@@ -0,0 +1,98 @@
1
+ """Typed return values for agent execute() methods."""
2
+
3
+ from typing import NotRequired, TypedDict
4
+
5
+
6
+ class SageTriageResult(TypedDict):
7
+ agent: str
8
+ status: str
9
+ issues: list[dict]
10
+ total_analyzed: int
11
+ prompt_used: NotRequired[str]
12
+
13
+
14
+ class EchoSocialResult(TypedDict):
15
+ agent: str
16
+ status: str
17
+ brand: str
18
+ top_mentions: list[dict]
19
+ total_mentions: int
20
+ platforms: dict
21
+ sentiment_overall: dict
22
+ engagement_opportunities: list[dict]
23
+ reputation_risks: list[dict]
24
+ prompt_used: NotRequired[str]
25
+
26
+
27
+ class IrisThemesResult(TypedDict):
28
+ agent: str
29
+ status: str
30
+ themes: list[dict]
31
+ prompt_used: NotRequired[str]
32
+ content: NotRequired[dict]
33
+
34
+
35
+ class NovaExperimentResult(TypedDict):
36
+ agent: str
37
+ status: str
38
+ experiments: list[dict]
39
+ prompt_used: NotRequired[str]
40
+
41
+
42
+ class KaiContentResult(TypedDict):
43
+ agent: str
44
+ status: str
45
+ content_type: NotRequired[str]
46
+ prompt_used: NotRequired[str]
47
+ content: NotRequired[dict]
48
+
49
+
50
+ class RexCompetitiveResult(TypedDict):
51
+ agent: str
52
+ status: str
53
+ task: str
54
+ competitors_discovered: list[str]
55
+ kb_sources: list[str]
56
+ web_intel_sources: dict[str, int]
57
+ upstream_social_mentions: NotRequired[int]
58
+ upstream_community_issues: NotRequired[int]
59
+ prompt_used: NotRequired[str]
60
+ content: NotRequired[dict]
61
+
62
+
63
+ class PaxSalesResult(TypedDict):
64
+ agent: str
65
+ status: str
66
+ asset_type: str
67
+ prompt_used: NotRequired[str]
68
+ content: NotRequired[str]
69
+
70
+
71
+ class MoxCampaignResult(TypedDict):
72
+ agent: str
73
+ status: str
74
+ content_type: str
75
+ prompt_used: NotRequired[str]
76
+ content: NotRequired[str]
77
+
78
+
79
+ class InstantlyAnalyticsResult(TypedDict):
80
+ agent: str
81
+ status: str
82
+ total_campaigns: int
83
+ total_sent: int
84
+ total_opened: int
85
+ total_replied: int
86
+ total_bounced: int
87
+ avg_open_rate: float
88
+ avg_reply_rate: float
89
+ avg_bounce_rate: float
90
+ per_campaign: list[dict]
91
+
92
+
93
+ class InstantlyRepliesResult(TypedDict):
94
+ agent: str
95
+ status: str
96
+ total_replies: int
97
+ categories: dict
98
+ drafts: list[dict]
@@ -0,0 +1,22 @@
1
+ """Video tutorial generation package for Vox agent."""
2
+
3
+ from devrel_origin.core.video.assembler import VideoAssembler
4
+ from devrel_origin.core.video.browser_recorder import BrowserAction, BrowserRecorder
5
+ from devrel_origin.core.video.desktop_recorder import DesktopAction, DesktopRecorder
6
+ from devrel_origin.core.video.overlay_renderer import OverlayConfig, OverlayRenderer
7
+ from devrel_origin.core.video.script_parser import ScriptParser, TutorialStep, VideoTutorial
8
+ from devrel_origin.core.video.tts_engine import TTSEngine
9
+
10
+ __all__ = [
11
+ "ScriptParser",
12
+ "TutorialStep",
13
+ "VideoTutorial",
14
+ "TTSEngine",
15
+ "BrowserRecorder",
16
+ "BrowserAction",
17
+ "DesktopRecorder",
18
+ "DesktopAction",
19
+ "OverlayRenderer",
20
+ "OverlayConfig",
21
+ "VideoAssembler",
22
+ ]
@@ -0,0 +1,131 @@
1
+ """
2
+ Video assembler — final FFmpeg pipeline for concatenation and audio merging.
3
+ Concatenates step videos, merges TTS audio tracks per step, outputs final .mp4.
4
+ """
5
+
6
+ import asyncio
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # Hard cap on FFmpeg subprocess wall-clock time (seconds). 5 minutes is
13
+ # generous for normal merge/concat work but stops a stuck encoder from
14
+ # hanging the whole pipeline.
15
+ FFMPEG_TIMEOUT_S = 300
16
+
17
+
18
+ async def _communicate_with_timeout(process: asyncio.subprocess.Process):
19
+ """Run ``process.communicate()`` with a hard timeout.
20
+
21
+ On timeout: send SIGKILL, await the reaper, then raise RuntimeError so
22
+ the surrounding pipeline error path engages and the caller logs the
23
+ failure instead of waiting indefinitely.
24
+ """
25
+ try:
26
+ return await asyncio.wait_for(process.communicate(), timeout=FFMPEG_TIMEOUT_S)
27
+ except asyncio.TimeoutError as exc:
28
+ process.kill()
29
+ await process.wait()
30
+ raise RuntimeError(
31
+ f"FFmpeg subprocess timed out after {FFMPEG_TIMEOUT_S}s; killed"
32
+ ) from exc
33
+
34
+
35
+ class VideoAssembler:
36
+ def __init__(self, output_dir: Path):
37
+ self.output_dir = Path(output_dir)
38
+ self.output_dir.mkdir(parents=True, exist_ok=True)
39
+
40
+ async def assemble(
41
+ self,
42
+ step_videos: list[Path],
43
+ step_audios: list[Path],
44
+ output_filename: str = "tutorial.mp4",
45
+ ) -> Path:
46
+ if len(step_videos) != len(step_audios):
47
+ raise ValueError(f"Mismatch: {len(step_videos)} videos vs {len(step_audios)} audios")
48
+ merged_steps = []
49
+ for i, (video, audio) in enumerate(zip(step_videos, step_audios, strict=True)):
50
+ merged_path = self.output_dir / f"merged_step_{i + 1}.mp4"
51
+ await self._merge_audio_video(video, audio, merged_path)
52
+ merged_steps.append(merged_path)
53
+ final_path = self.output_dir / output_filename
54
+ if len(merged_steps) == 1:
55
+ merged_steps[0].rename(final_path)
56
+ else:
57
+ await self._concatenate_videos(merged_steps, final_path)
58
+ for p in merged_steps:
59
+ if p.exists():
60
+ p.unlink()
61
+ logger.info(f"Final video assembled: {final_path}")
62
+ return final_path
63
+
64
+ async def _merge_audio_video(
65
+ self, video_path: Path, audio_path: Path, output_path: Path
66
+ ) -> None:
67
+ cmd = self._build_audio_merge_cmd(video_path, audio_path, output_path)
68
+ logger.info(f"Merging audio+video: {output_path.name}")
69
+ process = await asyncio.create_subprocess_exec(
70
+ *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
71
+ )
72
+ _, stderr = await _communicate_with_timeout(process)
73
+ if process.returncode != 0:
74
+ err_text = stderr.decode()
75
+ logger.error(f"FFmpeg merge failed: {err_text[:500]}")
76
+ raise RuntimeError(f"Audio/video merge failed: {err_text[:200]}")
77
+
78
+ async def _concatenate_videos(self, video_paths: list[Path], output_path: Path) -> None:
79
+ concat_file = self.output_dir / "concat_list.txt"
80
+ concat_file.write_text(self._build_concat_file_content(video_paths))
81
+ cmd = [
82
+ "ffmpeg",
83
+ "-y",
84
+ "-f",
85
+ "concat",
86
+ "-safe",
87
+ "0",
88
+ "-i",
89
+ str(concat_file),
90
+ "-c",
91
+ "copy",
92
+ str(output_path),
93
+ ]
94
+ logger.info(f"Concatenating {len(video_paths)} steps")
95
+ process = await asyncio.create_subprocess_exec(
96
+ *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
97
+ )
98
+ _, stderr = await _communicate_with_timeout(process)
99
+ concat_file.unlink(missing_ok=True)
100
+ if process.returncode != 0:
101
+ err_text = stderr.decode()
102
+ logger.error(f"FFmpeg concat failed: {err_text[:500]}")
103
+ raise RuntimeError(f"Video concatenation failed: {err_text[:200]}")
104
+
105
+ def _build_concat_file_content(self, video_paths: list[Path]) -> str:
106
+ lines = [f"file '{path}'" for path in video_paths]
107
+ return "\n".join(lines)
108
+
109
+ def _build_audio_merge_cmd(
110
+ self, video_path: Path, audio_path: Path, output_path: Path
111
+ ) -> list[str]:
112
+ return [
113
+ "ffmpeg",
114
+ "-y",
115
+ "-i",
116
+ str(video_path),
117
+ "-i",
118
+ str(audio_path),
119
+ "-c:v",
120
+ "copy",
121
+ "-c:a",
122
+ "aac",
123
+ "-b:a",
124
+ "192k",
125
+ "-map",
126
+ "0:v:0",
127
+ "-map",
128
+ "1:a:0",
129
+ "-shortest",
130
+ str(output_path),
131
+ ]