learnx-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. learnx_cli-0.3.0.dist-info/METADATA +240 -0
  2. learnx_cli-0.3.0.dist-info/RECORD +131 -0
  3. learnx_cli-0.3.0.dist-info/WHEEL +4 -0
  4. learnx_cli-0.3.0.dist-info/entry_points.txt +2 -0
  5. tutor/.env copy.example +4 -0
  6. tutor/__init__.py +0 -0
  7. tutor/__main__.py +4 -0
  8. tutor/assets/__init__.py +5 -0
  9. tutor/assets/html/fonts/Inter-Bold.woff2 +0 -0
  10. tutor/assets/html/fonts/Inter-Regular.woff2 +0 -0
  11. tutor/assets/html/fonts/Inter-SemiBold.woff2 +0 -0
  12. tutor/assets/html/fonts/JetBrainsMono-Regular.woff2 +0 -0
  13. tutor/assets/html/highlight-java.min.js +2 -0
  14. tutor/assets/html/highlight-javascript.min.js +2 -0
  15. tutor/assets/html/highlight-python.min.js +2 -0
  16. tutor/assets/html/highlight.min.js +17 -0
  17. tutor/assets/html/mermaid.min.js +31 -0
  18. tutor/assets/html/slide_base.css +464 -0
  19. tutor/assets/html/theme-learnx-dark.css +12 -0
  20. tutor/audio/__init__.py +0 -0
  21. tutor/audio/audio_builder.py +143 -0
  22. tutor/audio/sanitizer.py +9 -0
  23. tutor/audio/tts_renderer.py +54 -0
  24. tutor/cli/__init__.py +0 -0
  25. tutor/cli/commands.py +391 -0
  26. tutor/cli/logo.py +21 -0
  27. tutor/cli/playback_commands.py +239 -0
  28. tutor/cli/shell.py +91 -0
  29. tutor/cli/shell_context.py +18 -0
  30. tutor/cli/theme.py +39 -0
  31. tutor/cli/video_commands.py +123 -0
  32. tutor/config.py +122 -0
  33. tutor/conftest.py +5 -0
  34. tutor/constants.py +82 -0
  35. tutor/exceptions.py +26 -0
  36. tutor/generation/__init__.py +0 -0
  37. tutor/generation/assembler.py +81 -0
  38. tutor/generation/curriculum.py +97 -0
  39. tutor/generation/dialogue.py +172 -0
  40. tutor/generation/narrator.py +122 -0
  41. tutor/generation/segment_parser.py +223 -0
  42. tutor/generation/segment_planner.py +200 -0
  43. tutor/generation/visual_planner.py +205 -0
  44. tutor/infra/__init__.py +0 -0
  45. tutor/infra/llm.py +152 -0
  46. tutor/ingestion/__init__.py +0 -0
  47. tutor/ingestion/chunker.py +171 -0
  48. tutor/ingestion/doc_analyzer.py +41 -0
  49. tutor/ingestion/parse_content.py +19 -0
  50. tutor/ingestion/summarizer.py +51 -0
  51. tutor/inspector.py +117 -0
  52. tutor/llm_config.toml +58 -0
  53. tutor/models.py +147 -0
  54. tutor/player/__init__.py +0 -0
  55. tutor/player/input_handler.py +45 -0
  56. tutor/player/player.py +308 -0
  57. tutor/player/player_display.py +117 -0
  58. tutor/prompts/curriculum.txt +67 -0
  59. tutor/prompts/dialogue.txt +62 -0
  60. tutor/prompts/narrate.txt +34 -0
  61. tutor/prompts/qa.txt +17 -0
  62. tutor/prompts/summarize.txt +9 -0
  63. tutor/prompts/visual.txt +60 -0
  64. tutor/prompts/visual_v3.txt +91 -0
  65. tutor/qa/__init__.py +0 -0
  66. tutor/qa/qa.py +105 -0
  67. tutor/requirements-dev.txt +2 -0
  68. tutor/requirements.txt +12 -0
  69. tutor/sample_docs/headingless_large.md +1 -0
  70. tutor/sample_docs/headingless_test.md +1 -0
  71. tutor/sample_docs/java-basics.md +78 -0
  72. tutor/tests/__init__.py +0 -0
  73. tutor/tests/audio/__init__.py +0 -0
  74. tutor/tests/audio/test_audio_builder.py +106 -0
  75. tutor/tests/audio/test_sanitizer.py +41 -0
  76. tutor/tests/cli/__init__.py +0 -0
  77. tutor/tests/cli/test_commands.py +67 -0
  78. tutor/tests/cli/test_video_commands.py +190 -0
  79. tutor/tests/e2e/README.md +61 -0
  80. tutor/tests/e2e/__init__.py +0 -0
  81. tutor/tests/e2e/conftest.py +117 -0
  82. tutor/tests/e2e/fixtures/README.md +17 -0
  83. tutor/tests/e2e/fixtures/sample.md +13 -0
  84. tutor/tests/e2e/test_audio_quality.py +40 -0
  85. tutor/tests/e2e/test_av_sync.py +56 -0
  86. tutor/tests/e2e/test_pipeline_smoke.py +37 -0
  87. tutor/tests/e2e/test_slide_render.py +72 -0
  88. tutor/tests/e2e/test_video_streams.py +104 -0
  89. tutor/tests/generation/__init__.py +0 -0
  90. tutor/tests/generation/conftest.py +134 -0
  91. tutor/tests/generation/test_assembler.py +64 -0
  92. tutor/tests/generation/test_curriculum.py +107 -0
  93. tutor/tests/generation/test_narrator.py +165 -0
  94. tutor/tests/generation/test_segment_edge_cases.py +280 -0
  95. tutor/tests/generation/test_segment_planner.py +324 -0
  96. tutor/tests/generation/test_visual_planner.py +319 -0
  97. tutor/tests/ingestion/__init__.py +0 -0
  98. tutor/tests/ingestion/test_chunker.py +94 -0
  99. tutor/tests/ingestion/test_doc_analyzer.py +51 -0
  100. tutor/tests/player/__init__.py +0 -0
  101. tutor/tests/player/test_player_states.py +88 -0
  102. tutor/tests/test_assets.py +39 -0
  103. tutor/tests/test_models_visual.py +180 -0
  104. tutor/tests/visual/__init__.py +0 -0
  105. tutor/tests/visual/test_beat_timer.py +321 -0
  106. tutor/tests/visual/test_pipeline_integration.py +178 -0
  107. tutor/tests/visual/test_slide_renderer.py +298 -0
  108. tutor/tests/visual/test_subtitle_writer.py +165 -0
  109. tutor/tests/visual/test_video_assembler.py +108 -0
  110. tutor/tests/visual/test_visual_pipeline.py +270 -0
  111. tutor/tutor.py +365 -0
  112. tutor/visual/__init__.py +213 -0
  113. tutor/visual/beat_timer.py +222 -0
  114. tutor/visual/slide_renderer.py +236 -0
  115. tutor/visual/subtitle_writer.py +187 -0
  116. tutor/visual/templates/_base.html.j2 +40 -0
  117. tutor/visual/templates/analogy.html.j2 +21 -0
  118. tutor/visual/templates/callout.html.j2 +10 -0
  119. tutor/visual/templates/code_example.html.j2 +12 -0
  120. tutor/visual/templates/comparison.html.j2 +28 -0
  121. tutor/visual/templates/decision_guide.html.j2 +37 -0
  122. tutor/visual/templates/definition.html.j2 +13 -0
  123. tutor/visual/templates/diagram.html.j2 +11 -0
  124. tutor/visual/templates/hook_question.html.j2 +17 -0
  125. tutor/visual/templates/key_insight.html.j2 +9 -0
  126. tutor/visual/templates/memory_hook.html.j2 +7 -0
  127. tutor/visual/templates/outro.html.j2 +16 -0
  128. tutor/visual/templates/question_prompt.html.j2 +13 -0
  129. tutor/visual/templates/step_sequence.html.j2 +14 -0
  130. tutor/visual/templates/title_card.html.j2 +12 -0
  131. tutor/visual/video_assembler.py +299 -0
@@ -0,0 +1,172 @@
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from tutor.constants import PROMPT_VERSION, SUMMARY_CACHE_DIR
8
+ from tutor.exceptions import LLMError
9
+ from tutor.infra.llm import LIMITS, LLMFn, load_prompt
10
+ from tutor.models import Chunk, DialogueLine, TeachingUnit
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+
15
+ def generate(
16
+ unit: TeachingUnit,
17
+ source_chunks: list[Chunk],
18
+ fmt: str,
19
+ llm_fn: LLMFn,
20
+ difficulty: str = "beginner",
21
+ cache_dir: str = SUMMARY_CACHE_DIR,
22
+ ) -> list[DialogueLine]:
23
+ cache_key = hashlib.md5(
24
+ (unit.concept + str(unit.word_budget) + fmt + difficulty + PROMPT_VERSION).encode()
25
+ ).hexdigest()
26
+ cache_file = Path(cache_dir) / f"{cache_key}.dialogue.json"
27
+
28
+ if cache_file.exists():
29
+ log.debug("Cache hit for dialogue unit %d (%s)", unit.unit, unit.concept)
30
+ raw_lines = json.loads(cache_file.read_text(encoding="utf-8"))
31
+ return [DialogueLine(**d) for d in raw_lines]
32
+
33
+ relevant = [c for c in source_chunks if c.chunk_id in unit.source_sections]
34
+ if not relevant:
35
+ relevant = source_chunks[:2]
36
+ source_text = "\n\n".join(f"## {c.heading}\n{c.text}" for c in relevant)
37
+ source_text = _truncate_source(source_text, LIMITS["max_source_tokens"])
38
+
39
+ unit_json = json.dumps(
40
+ {
41
+ "concept": unit.concept,
42
+ "complexity": unit.complexity,
43
+ "word_budget": unit.word_budget,
44
+ "key_facts": unit.key_facts,
45
+ "common_misconception": unit.common_misconception,
46
+ "good_analogy": unit.good_analogy,
47
+ "js_contrast": unit.js_contrast,
48
+ "question_style": unit.question_style,
49
+ "memory_hook": unit.memory_hook,
50
+ "prerequisite_concepts": unit.prerequisite_concepts,
51
+ "production_relevance": unit.production_relevance,
52
+ },
53
+ indent=2,
54
+ )
55
+
56
+ speaker_constraint = (
57
+ "IMPORTANT: Only use ALEX and SAM speakers. Do NOT use MAYA."
58
+ if fmt == "dual-tutor"
59
+ else "IMPORTANT: Only use ALEX and MAYA speakers. Do NOT use SAM."
60
+ )
61
+ system_prompt = (
62
+ load_prompt("dialogue.txt").format(
63
+ format=fmt,
64
+ word_budget=unit.word_budget,
65
+ )
66
+ + f"\n\n{speaker_constraint}"
67
+ )
68
+
69
+ messages = [
70
+ {"role": "system", "content": system_prompt},
71
+ {"role": "user", "content": f"Unit:\n{unit_json}\n\nSource:\n{source_text}"},
72
+ ]
73
+
74
+ log.info("Generating dialogue for unit %d: %s", unit.unit, unit.concept)
75
+ raw = llm_fn(messages, call_type="dialogue")
76
+ lines = _parse_dialogue(raw, unit.unit)
77
+
78
+ if len(lines) < 4:
79
+ log.warning("Only %d lines parsed, retrying dialogue generation", len(lines))
80
+ raw = llm_fn(messages, call_type="dialogue")
81
+ lines = _parse_dialogue(raw, unit.unit)
82
+ if len(lines) < 4:
83
+ raise LLMError(
84
+ f"Dialogue generation returned fewer than 4 lines for unit {unit.unit}: {unit.concept}"
85
+ )
86
+
87
+ lines = _normalize_speakers(lines, fmt)
88
+ _validate_speakers(lines, fmt)
89
+
90
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
91
+ cache_file.write_text(
92
+ json.dumps(
93
+ [
94
+ {"speaker": ln.speaker, "text": ln.text, "unit_number": ln.unit_number}
95
+ for ln in lines
96
+ ]
97
+ ),
98
+ encoding="utf-8",
99
+ )
100
+
101
+ return lines
102
+
103
+
104
+ def _truncate_source(text: str, max_tokens: int) -> str:
105
+ words = text.split()
106
+ max_words = int(max_tokens / 1.3)
107
+ if len(words) <= max_words:
108
+ return text
109
+ log.warning(
110
+ "Source text truncated from %d to %d words for context limit", len(words), max_words
111
+ )
112
+ return " ".join(words[:max_words])
113
+
114
+
115
+ def _parse_dialogue_line(raw_line: str, unit_number: int) -> DialogueLine | None:
116
+ match = re.match(r"^(ALEX|MAYA|SAM)\s*[:\-]\s*(.+)", raw_line.strip(), re.IGNORECASE)
117
+ if not match:
118
+ return None
119
+ return DialogueLine(
120
+ speaker=match.group(1).upper(),
121
+ text=match.group(2).strip(),
122
+ unit_number=unit_number,
123
+ )
124
+
125
+
126
+ def _normalize_speakers(lines: list[DialogueLine], fmt: str) -> list[DialogueLine]:
127
+ """Remap speakers so the output matches the requested format."""
128
+ if fmt == "dual-tutor":
129
+ return [
130
+ DialogueLine(
131
+ speaker="SAM" if ln.speaker == "MAYA" else ln.speaker,
132
+ text=ln.text,
133
+ unit_number=ln.unit_number,
134
+ )
135
+ for ln in lines
136
+ ]
137
+ return [
138
+ DialogueLine(
139
+ speaker="MAYA" if ln.speaker == "SAM" else ln.speaker,
140
+ text=ln.text,
141
+ unit_number=ln.unit_number,
142
+ )
143
+ for ln in lines
144
+ ]
145
+
146
+
147
+ def _validate_speakers(lines: list[DialogueLine], fmt: str) -> None:
148
+ speakers = {line.speaker for line in lines}
149
+ if fmt == "tutor-student":
150
+ if "ALEX" not in speakers:
151
+ raise LLMError("tutor-student dialogue missing ALEX lines")
152
+ if "SAM" in speakers:
153
+ raise LLMError("tutor-student dialogue contains SAM — wrong format")
154
+ elif fmt == "dual-tutor":
155
+ if "MAYA" in speakers:
156
+ raise LLMError("dual-tutor dialogue contains MAYA — wrong format")
157
+ expected = {"ALEX", "SAM"}
158
+ if not expected.issubset(speakers):
159
+ raise LLMError(f"dual-tutor dialogue missing speakers: {expected - speakers}")
160
+
161
+
162
+ def _parse_dialogue(raw: str, unit_number: int) -> list[DialogueLine]:
163
+ lines: list[DialogueLine] = []
164
+ for raw_line in raw.split("\n"):
165
+ if not raw_line.strip():
166
+ continue
167
+ parsed = _parse_dialogue_line(raw_line, unit_number)
168
+ if parsed:
169
+ lines.append(parsed)
170
+ else:
171
+ log.debug("Skipping unparseable line: %s", raw_line[:80])
172
+ return lines
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import re
7
+ from pathlib import Path
8
+
9
+ from tutor.audio import sanitizer
10
+ from tutor.constants import SUMMARY_CACHE_DIR
11
+ from tutor.infra.llm import LLMFn, load_prompt
12
+ from tutor.models import Chunk, DialogueLine, TeachingUnit
13
+
14
+ log = logging.getLogger(__name__)
15
+
16
+ NARRATE_VERSION = "narrate_v1"
17
+ _WORDS_PER_SOURCE_WORD = 1.25
18
+
19
+
20
+ def narrate_all(
21
+ chunks: list[Chunk],
22
+ doc_title: str,
23
+ llm_fn: LLMFn,
24
+ cache_dir: str = SUMMARY_CACHE_DIR,
25
+ ) -> tuple[list[TeachingUnit], list[list[DialogueLine]]]:
26
+ """Narrate every chunk in document order. Returns (units, all_lines)."""
27
+ units: list[TeachingUnit] = []
28
+ all_lines: list[list[DialogueLine]] = []
29
+ total = len(chunks)
30
+
31
+ for i, chunk in enumerate(chunks):
32
+ lines = _narrate_chunk(chunk, i + 1, total, doc_title, llm_fn, cache_dir)
33
+ units.append(_chunk_to_unit(chunk, i + 1))
34
+ all_lines.append(lines)
35
+
36
+ return units, all_lines
37
+
38
+
39
+ def _chunk_to_unit(chunk: Chunk, unit_index: int) -> TeachingUnit:
40
+ word_budget = max(100, int(len(chunk.text.split()) * _WORDS_PER_SOURCE_WORD))
41
+ return TeachingUnit(
42
+ unit=unit_index,
43
+ concept=chunk.heading or f"Section {unit_index}",
44
+ source_sections=[chunk.chunk_id],
45
+ complexity=1,
46
+ word_budget=word_budget,
47
+ key_facts=[],
48
+ common_misconception="",
49
+ good_analogy="",
50
+ question_style="recall",
51
+ memory_hook="",
52
+ )
53
+
54
+
55
+ def _narrate_chunk(
56
+ chunk: Chunk,
57
+ section_index: int,
58
+ total_sections: int,
59
+ doc_title: str,
60
+ llm_fn: LLMFn,
61
+ cache_dir: str,
62
+ ) -> list[DialogueLine]:
63
+ cache_key = hashlib.md5((chunk.chunk_id + chunk.text + NARRATE_VERSION).encode()).hexdigest()
64
+ cache_file = Path(cache_dir) / f"{cache_key}.narrate.json"
65
+
66
+ if cache_file.exists():
67
+ log.debug(
68
+ "Cache hit for narration %d/%d (%s)", section_index, total_sections, chunk.heading
69
+ )
70
+ raw = json.loads(cache_file.read_text(encoding="utf-8"))
71
+ return [DialogueLine(**d) for d in raw]
72
+
73
+ word_budget = max(100, int(len(chunk.text.split()) * _WORDS_PER_SOURCE_WORD))
74
+ prompt = load_prompt("narrate.txt").format(
75
+ doc_title=doc_title,
76
+ section_index=section_index,
77
+ total_sections=total_sections,
78
+ heading=chunk.heading,
79
+ word_budget=word_budget,
80
+ section_text=chunk.text,
81
+ )
82
+
83
+ log.info("Narrating section %d/%d: %s", section_index, total_sections, chunk.heading)
84
+ raw_text = llm_fn([{"role": "user", "content": prompt}], call_type="dialogue")
85
+ lines = _parse_narration(raw_text, section_index)
86
+
87
+ if not lines:
88
+ log.warning("No lines parsed for section %d, retrying", section_index)
89
+ raw_text = llm_fn([{"role": "user", "content": prompt}], call_type="dialogue")
90
+ lines = _parse_narration(raw_text, section_index)
91
+
92
+ for line in lines:
93
+ line.text = sanitizer.apply(line.text)
94
+
95
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
96
+ cache_file.write_text(
97
+ json.dumps(
98
+ [
99
+ {"speaker": ln.speaker, "text": ln.text, "unit_number": ln.unit_number}
100
+ for ln in lines
101
+ ]
102
+ ),
103
+ encoding="utf-8",
104
+ )
105
+
106
+ return lines
107
+
108
+
109
+ def _parse_narration(raw: str, unit_number: int) -> list[DialogueLine]:
110
+ lines: list[DialogueLine] = []
111
+ for raw_line in raw.split("\n"):
112
+ stripped = raw_line.strip()
113
+ if not stripped:
114
+ continue
115
+ match = re.match(r"^ALEX\s*[:\-]\s*(.+)", stripped, re.IGNORECASE)
116
+ if match:
117
+ lines.append(
118
+ DialogueLine(speaker="ALEX", text=match.group(1).strip(), unit_number=unit_number)
119
+ )
120
+ else:
121
+ log.debug("Skipping unparseable narration line: %s", stripped[:80])
122
+ return lines
@@ -0,0 +1,223 @@
1
+ """Parse and normalise raw LLM segment responses into SlideSegment lists."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from tutor.infra.llm import parse_json_response
8
+ from tutor.models import VALID_VISUAL_TYPES, DialogueLine, SlideSegment
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ def parse_segments_response(
14
+ raw: str,
15
+ unit_index: int,
16
+ lines: list[DialogueLine],
17
+ ) -> list[SlideSegment]:
18
+ """Parse LLM JSON array into SlideSegment objects.
19
+
20
+ Validates visual_type, clamps indices, fills gaps.
21
+ Falls back to fallback_segments() on any parse failure.
22
+ """
23
+ try:
24
+ data = parse_json_response(raw)
25
+ except Exception:
26
+ return fallback_segments(unit_index, lines)
27
+
28
+ if not isinstance(data, list):
29
+ return fallback_segments(unit_index, lines)
30
+
31
+ n = len(lines)
32
+ result: list[SlideSegment] = []
33
+
34
+ for item in data:
35
+ if not isinstance(item, dict):
36
+ continue
37
+
38
+ vtype = item.get("visual_type", "key_insight")
39
+ if vtype not in VALID_VISUAL_TYPES:
40
+ vtype = "key_insight"
41
+
42
+ ls = int(item.get("lines_start", 0))
43
+ le = int(item.get("lines_end", 0))
44
+
45
+ if ls > le:
46
+ ls, le = le, ls
47
+
48
+ ls = max(0, ls)
49
+ le = min(n - 1, le) if n > 0 else 0
50
+
51
+ title = item.get("title") or vtype.replace("_", " ").title()
52
+ body = item.get("body") or None
53
+ code = item.get("code") or None
54
+ language = item.get("language") or None
55
+ mermaid = item.get("mermaid") if vtype == "diagram" else None
56
+ left = item.get("left") or None
57
+ right = item.get("right") or None
58
+ rows = item.get("rows") or None
59
+
60
+ if rows is not None:
61
+ if not (isinstance(rows, list) and all(isinstance(r, list) for r in rows)):
62
+ rows = None
63
+
64
+ seg = SlideSegment(
65
+ unit_index=unit_index,
66
+ segment_index=0,
67
+ lines_start=ls,
68
+ lines_end=le,
69
+ visual_type=vtype,
70
+ title=title,
71
+ body=body,
72
+ code=code,
73
+ language=language,
74
+ mermaid=mermaid,
75
+ left=left,
76
+ right=right,
77
+ rows=rows,
78
+ )
79
+ result.append(_validate_segment(seg))
80
+
81
+ if not result:
82
+ return fallback_segments(unit_index, lines)
83
+
84
+ return fill_gaps(result, unit_index, n)
85
+
86
+
87
+ def fill_gaps(
88
+ raw_segments: list[SlideSegment],
89
+ unit_index: int,
90
+ total_lines: int,
91
+ ) -> list[SlideSegment]:
92
+ """Ensure every line 0..total_lines-1 is covered by exactly one segment.
93
+
94
+ Inserts key_insight segments for uncovered ranges and renumbers segment_index.
95
+ """
96
+ if total_lines == 0:
97
+ return raw_segments
98
+
99
+ segs = sorted(raw_segments, key=lambda s: s.lines_start)
100
+ result: list[SlideSegment] = []
101
+ cursor = 0
102
+
103
+ for seg in segs:
104
+ if seg.lines_end < cursor:
105
+ continue
106
+
107
+ start = max(seg.lines_start, cursor)
108
+
109
+ if start > cursor:
110
+ result.append(_make_gap_segment(unit_index, cursor, start - 1))
111
+
112
+ adjusted = (
113
+ seg
114
+ if seg.lines_start == start
115
+ else SlideSegment(
116
+ unit_index=seg.unit_index,
117
+ segment_index=seg.segment_index,
118
+ lines_start=start,
119
+ lines_end=seg.lines_end,
120
+ visual_type=seg.visual_type,
121
+ title=seg.title,
122
+ body=seg.body,
123
+ code=seg.code,
124
+ language=seg.language,
125
+ mermaid=seg.mermaid,
126
+ left=seg.left,
127
+ right=seg.right,
128
+ rows=seg.rows,
129
+ )
130
+ )
131
+ result.append(adjusted)
132
+ cursor = adjusted.lines_end + 1
133
+
134
+ if cursor < total_lines:
135
+ result.append(_make_gap_segment(unit_index, cursor, total_lines - 1))
136
+
137
+ for i, seg in enumerate(result):
138
+ seg.segment_index = i
139
+
140
+ return result
141
+
142
+
143
+ def fallback_segments(
144
+ unit_index: int,
145
+ lines: list[DialogueLine],
146
+ ) -> list[SlideSegment]:
147
+ """Produce minimal valid segments without LLM. Never returns an empty list."""
148
+ n = len(lines)
149
+ if n == 0:
150
+ return [_make_segment(unit_index, 0, 0, 0, "hook_question", "Introduction")]
151
+
152
+ segs: list[SlideSegment] = []
153
+ idx = 0
154
+
155
+ hook_end = 0 if n <= 2 else min(1, n - 2)
156
+ segs.append(_make_segment(unit_index, idx, 0, hook_end, "hook_question", "Opening Question"))
157
+ idx += 1
158
+ cursor = hook_end + 1
159
+
160
+ while cursor < n - 1:
161
+ end = min(cursor + 2, n - 2)
162
+ segs.append(_make_segment(unit_index, idx, cursor, end, "key_insight", "Key Insight"))
163
+ idx += 1
164
+ cursor = end + 1
165
+
166
+ if cursor <= n - 1:
167
+ segs.append(_make_segment(unit_index, idx, cursor, n - 1, "memory_hook", "Remember This"))
168
+ elif len(segs) == 1:
169
+ segs.append(_make_segment(unit_index, idx, 0, 0, "memory_hook", "Remember This"))
170
+
171
+ return segs
172
+
173
+
174
+ # ── private helpers ───────────────────────────────────────────────────────────
175
+
176
+
177
+ def _validate_segment(seg: SlideSegment) -> SlideSegment:
178
+ """Post-process a segment: reclassify types that would produce blank slides."""
179
+ if seg.visual_type == "step_sequence" and not seg.body:
180
+ log.warning(
181
+ "segment %d-%d is step_sequence but body is empty — falling back to definition",
182
+ seg.lines_start,
183
+ seg.lines_end,
184
+ )
185
+ seg.visual_type = "definition"
186
+ seg.body = seg.title
187
+ if seg.visual_type == "callout" and not seg.body:
188
+ log.warning(
189
+ "segment %d-%d is callout but body is empty — falling back to key_insight",
190
+ seg.lines_start,
191
+ seg.lines_end,
192
+ )
193
+ seg.visual_type = "key_insight"
194
+ return seg
195
+
196
+
197
+ def _make_segment(
198
+ unit_index: int,
199
+ segment_index: int,
200
+ lines_start: int,
201
+ lines_end: int,
202
+ visual_type: str,
203
+ title: str,
204
+ ) -> SlideSegment:
205
+ return SlideSegment(
206
+ unit_index=unit_index,
207
+ segment_index=segment_index,
208
+ lines_start=lines_start,
209
+ lines_end=lines_end,
210
+ visual_type=visual_type,
211
+ title=title,
212
+ body=None,
213
+ code=None,
214
+ language=None,
215
+ mermaid=None,
216
+ left=None,
217
+ right=None,
218
+ rows=None,
219
+ )
220
+
221
+
222
+ def _make_gap_segment(unit_index: int, start: int, end: int) -> SlideSegment:
223
+ return _make_segment(unit_index, -1, start, end, "key_insight", "Key Insight")