learnx-cli 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- learnx_cli-0.3.0.dist-info/METADATA +240 -0
- learnx_cli-0.3.0.dist-info/RECORD +131 -0
- learnx_cli-0.3.0.dist-info/WHEEL +4 -0
- learnx_cli-0.3.0.dist-info/entry_points.txt +2 -0
- tutor/.env copy.example +4 -0
- tutor/__init__.py +0 -0
- tutor/__main__.py +4 -0
- tutor/assets/__init__.py +5 -0
- tutor/assets/html/fonts/Inter-Bold.woff2 +0 -0
- tutor/assets/html/fonts/Inter-Regular.woff2 +0 -0
- tutor/assets/html/fonts/Inter-SemiBold.woff2 +0 -0
- tutor/assets/html/fonts/JetBrainsMono-Regular.woff2 +0 -0
- tutor/assets/html/highlight-java.min.js +2 -0
- tutor/assets/html/highlight-javascript.min.js +2 -0
- tutor/assets/html/highlight-python.min.js +2 -0
- tutor/assets/html/highlight.min.js +17 -0
- tutor/assets/html/mermaid.min.js +31 -0
- tutor/assets/html/slide_base.css +464 -0
- tutor/assets/html/theme-learnx-dark.css +12 -0
- tutor/audio/__init__.py +0 -0
- tutor/audio/audio_builder.py +143 -0
- tutor/audio/sanitizer.py +9 -0
- tutor/audio/tts_renderer.py +54 -0
- tutor/cli/__init__.py +0 -0
- tutor/cli/commands.py +391 -0
- tutor/cli/logo.py +21 -0
- tutor/cli/playback_commands.py +239 -0
- tutor/cli/shell.py +91 -0
- tutor/cli/shell_context.py +18 -0
- tutor/cli/theme.py +39 -0
- tutor/cli/video_commands.py +123 -0
- tutor/config.py +122 -0
- tutor/conftest.py +5 -0
- tutor/constants.py +82 -0
- tutor/exceptions.py +26 -0
- tutor/generation/__init__.py +0 -0
- tutor/generation/assembler.py +81 -0
- tutor/generation/curriculum.py +97 -0
- tutor/generation/dialogue.py +172 -0
- tutor/generation/narrator.py +122 -0
- tutor/generation/segment_parser.py +223 -0
- tutor/generation/segment_planner.py +200 -0
- tutor/generation/visual_planner.py +205 -0
- tutor/infra/__init__.py +0 -0
- tutor/infra/llm.py +152 -0
- tutor/ingestion/__init__.py +0 -0
- tutor/ingestion/chunker.py +171 -0
- tutor/ingestion/doc_analyzer.py +41 -0
- tutor/ingestion/parse_content.py +19 -0
- tutor/ingestion/summarizer.py +51 -0
- tutor/inspector.py +117 -0
- tutor/llm_config.toml +58 -0
- tutor/models.py +147 -0
- tutor/player/__init__.py +0 -0
- tutor/player/input_handler.py +45 -0
- tutor/player/player.py +308 -0
- tutor/player/player_display.py +117 -0
- tutor/prompts/curriculum.txt +67 -0
- tutor/prompts/dialogue.txt +62 -0
- tutor/prompts/narrate.txt +34 -0
- tutor/prompts/qa.txt +17 -0
- tutor/prompts/summarize.txt +9 -0
- tutor/prompts/visual.txt +60 -0
- tutor/prompts/visual_v3.txt +91 -0
- tutor/qa/__init__.py +0 -0
- tutor/qa/qa.py +105 -0
- tutor/requirements-dev.txt +2 -0
- tutor/requirements.txt +12 -0
- tutor/sample_docs/headingless_large.md +1 -0
- tutor/sample_docs/headingless_test.md +1 -0
- tutor/sample_docs/java-basics.md +78 -0
- tutor/tests/__init__.py +0 -0
- tutor/tests/audio/__init__.py +0 -0
- tutor/tests/audio/test_audio_builder.py +106 -0
- tutor/tests/audio/test_sanitizer.py +41 -0
- tutor/tests/cli/__init__.py +0 -0
- tutor/tests/cli/test_commands.py +67 -0
- tutor/tests/cli/test_video_commands.py +190 -0
- tutor/tests/e2e/README.md +61 -0
- tutor/tests/e2e/__init__.py +0 -0
- tutor/tests/e2e/conftest.py +117 -0
- tutor/tests/e2e/fixtures/README.md +17 -0
- tutor/tests/e2e/fixtures/sample.md +13 -0
- tutor/tests/e2e/test_audio_quality.py +40 -0
- tutor/tests/e2e/test_av_sync.py +56 -0
- tutor/tests/e2e/test_pipeline_smoke.py +37 -0
- tutor/tests/e2e/test_slide_render.py +72 -0
- tutor/tests/e2e/test_video_streams.py +104 -0
- tutor/tests/generation/__init__.py +0 -0
- tutor/tests/generation/conftest.py +134 -0
- tutor/tests/generation/test_assembler.py +64 -0
- tutor/tests/generation/test_curriculum.py +107 -0
- tutor/tests/generation/test_narrator.py +165 -0
- tutor/tests/generation/test_segment_edge_cases.py +280 -0
- tutor/tests/generation/test_segment_planner.py +324 -0
- tutor/tests/generation/test_visual_planner.py +319 -0
- tutor/tests/ingestion/__init__.py +0 -0
- tutor/tests/ingestion/test_chunker.py +94 -0
- tutor/tests/ingestion/test_doc_analyzer.py +51 -0
- tutor/tests/player/__init__.py +0 -0
- tutor/tests/player/test_player_states.py +88 -0
- tutor/tests/test_assets.py +39 -0
- tutor/tests/test_models_visual.py +180 -0
- tutor/tests/visual/__init__.py +0 -0
- tutor/tests/visual/test_beat_timer.py +321 -0
- tutor/tests/visual/test_pipeline_integration.py +178 -0
- tutor/tests/visual/test_slide_renderer.py +298 -0
- tutor/tests/visual/test_subtitle_writer.py +165 -0
- tutor/tests/visual/test_video_assembler.py +108 -0
- tutor/tests/visual/test_visual_pipeline.py +270 -0
- tutor/tutor.py +365 -0
- tutor/visual/__init__.py +213 -0
- tutor/visual/beat_timer.py +222 -0
- tutor/visual/slide_renderer.py +236 -0
- tutor/visual/subtitle_writer.py +187 -0
- tutor/visual/templates/_base.html.j2 +40 -0
- tutor/visual/templates/analogy.html.j2 +21 -0
- tutor/visual/templates/callout.html.j2 +10 -0
- tutor/visual/templates/code_example.html.j2 +12 -0
- tutor/visual/templates/comparison.html.j2 +28 -0
- tutor/visual/templates/decision_guide.html.j2 +37 -0
- tutor/visual/templates/definition.html.j2 +13 -0
- tutor/visual/templates/diagram.html.j2 +11 -0
- tutor/visual/templates/hook_question.html.j2 +17 -0
- tutor/visual/templates/key_insight.html.j2 +9 -0
- tutor/visual/templates/memory_hook.html.j2 +7 -0
- tutor/visual/templates/outro.html.j2 +16 -0
- tutor/visual/templates/question_prompt.html.j2 +13 -0
- tutor/visual/templates/step_sequence.html.j2 +14 -0
- tutor/visual/templates/title_card.html.j2 +12 -0
- tutor/visual/video_assembler.py +299 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from tutor.constants import PROMPT_VERSION, SUMMARY_CACHE_DIR
|
|
8
|
+
from tutor.exceptions import LLMError
|
|
9
|
+
from tutor.infra.llm import LIMITS, LLMFn, load_prompt
|
|
10
|
+
from tutor.models import Chunk, DialogueLine, TeachingUnit
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def generate(
|
|
16
|
+
unit: TeachingUnit,
|
|
17
|
+
source_chunks: list[Chunk],
|
|
18
|
+
fmt: str,
|
|
19
|
+
llm_fn: LLMFn,
|
|
20
|
+
difficulty: str = "beginner",
|
|
21
|
+
cache_dir: str = SUMMARY_CACHE_DIR,
|
|
22
|
+
) -> list[DialogueLine]:
|
|
23
|
+
cache_key = hashlib.md5(
|
|
24
|
+
(unit.concept + str(unit.word_budget) + fmt + difficulty + PROMPT_VERSION).encode()
|
|
25
|
+
).hexdigest()
|
|
26
|
+
cache_file = Path(cache_dir) / f"{cache_key}.dialogue.json"
|
|
27
|
+
|
|
28
|
+
if cache_file.exists():
|
|
29
|
+
log.debug("Cache hit for dialogue unit %d (%s)", unit.unit, unit.concept)
|
|
30
|
+
raw_lines = json.loads(cache_file.read_text(encoding="utf-8"))
|
|
31
|
+
return [DialogueLine(**d) for d in raw_lines]
|
|
32
|
+
|
|
33
|
+
relevant = [c for c in source_chunks if c.chunk_id in unit.source_sections]
|
|
34
|
+
if not relevant:
|
|
35
|
+
relevant = source_chunks[:2]
|
|
36
|
+
source_text = "\n\n".join(f"## {c.heading}\n{c.text}" for c in relevant)
|
|
37
|
+
source_text = _truncate_source(source_text, LIMITS["max_source_tokens"])
|
|
38
|
+
|
|
39
|
+
unit_json = json.dumps(
|
|
40
|
+
{
|
|
41
|
+
"concept": unit.concept,
|
|
42
|
+
"complexity": unit.complexity,
|
|
43
|
+
"word_budget": unit.word_budget,
|
|
44
|
+
"key_facts": unit.key_facts,
|
|
45
|
+
"common_misconception": unit.common_misconception,
|
|
46
|
+
"good_analogy": unit.good_analogy,
|
|
47
|
+
"js_contrast": unit.js_contrast,
|
|
48
|
+
"question_style": unit.question_style,
|
|
49
|
+
"memory_hook": unit.memory_hook,
|
|
50
|
+
"prerequisite_concepts": unit.prerequisite_concepts,
|
|
51
|
+
"production_relevance": unit.production_relevance,
|
|
52
|
+
},
|
|
53
|
+
indent=2,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
speaker_constraint = (
|
|
57
|
+
"IMPORTANT: Only use ALEX and SAM speakers. Do NOT use MAYA."
|
|
58
|
+
if fmt == "dual-tutor"
|
|
59
|
+
else "IMPORTANT: Only use ALEX and MAYA speakers. Do NOT use SAM."
|
|
60
|
+
)
|
|
61
|
+
system_prompt = (
|
|
62
|
+
load_prompt("dialogue.txt").format(
|
|
63
|
+
format=fmt,
|
|
64
|
+
word_budget=unit.word_budget,
|
|
65
|
+
)
|
|
66
|
+
+ f"\n\n{speaker_constraint}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
messages = [
|
|
70
|
+
{"role": "system", "content": system_prompt},
|
|
71
|
+
{"role": "user", "content": f"Unit:\n{unit_json}\n\nSource:\n{source_text}"},
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
log.info("Generating dialogue for unit %d: %s", unit.unit, unit.concept)
|
|
75
|
+
raw = llm_fn(messages, call_type="dialogue")
|
|
76
|
+
lines = _parse_dialogue(raw, unit.unit)
|
|
77
|
+
|
|
78
|
+
if len(lines) < 4:
|
|
79
|
+
log.warning("Only %d lines parsed, retrying dialogue generation", len(lines))
|
|
80
|
+
raw = llm_fn(messages, call_type="dialogue")
|
|
81
|
+
lines = _parse_dialogue(raw, unit.unit)
|
|
82
|
+
if len(lines) < 4:
|
|
83
|
+
raise LLMError(
|
|
84
|
+
f"Dialogue generation returned fewer than 4 lines for unit {unit.unit}: {unit.concept}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
lines = _normalize_speakers(lines, fmt)
|
|
88
|
+
_validate_speakers(lines, fmt)
|
|
89
|
+
|
|
90
|
+
Path(cache_dir).mkdir(parents=True, exist_ok=True)
|
|
91
|
+
cache_file.write_text(
|
|
92
|
+
json.dumps(
|
|
93
|
+
[
|
|
94
|
+
{"speaker": ln.speaker, "text": ln.text, "unit_number": ln.unit_number}
|
|
95
|
+
for ln in lines
|
|
96
|
+
]
|
|
97
|
+
),
|
|
98
|
+
encoding="utf-8",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return lines
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _truncate_source(text: str, max_tokens: int) -> str:
|
|
105
|
+
words = text.split()
|
|
106
|
+
max_words = int(max_tokens / 1.3)
|
|
107
|
+
if len(words) <= max_words:
|
|
108
|
+
return text
|
|
109
|
+
log.warning(
|
|
110
|
+
"Source text truncated from %d to %d words for context limit", len(words), max_words
|
|
111
|
+
)
|
|
112
|
+
return " ".join(words[:max_words])
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _parse_dialogue_line(raw_line: str, unit_number: int) -> DialogueLine | None:
|
|
116
|
+
match = re.match(r"^(ALEX|MAYA|SAM)\s*[:\-]\s*(.+)", raw_line.strip(), re.IGNORECASE)
|
|
117
|
+
if not match:
|
|
118
|
+
return None
|
|
119
|
+
return DialogueLine(
|
|
120
|
+
speaker=match.group(1).upper(),
|
|
121
|
+
text=match.group(2).strip(),
|
|
122
|
+
unit_number=unit_number,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _normalize_speakers(lines: list[DialogueLine], fmt: str) -> list[DialogueLine]:
|
|
127
|
+
"""Remap speakers so the output matches the requested format."""
|
|
128
|
+
if fmt == "dual-tutor":
|
|
129
|
+
return [
|
|
130
|
+
DialogueLine(
|
|
131
|
+
speaker="SAM" if ln.speaker == "MAYA" else ln.speaker,
|
|
132
|
+
text=ln.text,
|
|
133
|
+
unit_number=ln.unit_number,
|
|
134
|
+
)
|
|
135
|
+
for ln in lines
|
|
136
|
+
]
|
|
137
|
+
return [
|
|
138
|
+
DialogueLine(
|
|
139
|
+
speaker="MAYA" if ln.speaker == "SAM" else ln.speaker,
|
|
140
|
+
text=ln.text,
|
|
141
|
+
unit_number=ln.unit_number,
|
|
142
|
+
)
|
|
143
|
+
for ln in lines
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _validate_speakers(lines: list[DialogueLine], fmt: str) -> None:
|
|
148
|
+
speakers = {line.speaker for line in lines}
|
|
149
|
+
if fmt == "tutor-student":
|
|
150
|
+
if "ALEX" not in speakers:
|
|
151
|
+
raise LLMError("tutor-student dialogue missing ALEX lines")
|
|
152
|
+
if "SAM" in speakers:
|
|
153
|
+
raise LLMError("tutor-student dialogue contains SAM — wrong format")
|
|
154
|
+
elif fmt == "dual-tutor":
|
|
155
|
+
if "MAYA" in speakers:
|
|
156
|
+
raise LLMError("dual-tutor dialogue contains MAYA — wrong format")
|
|
157
|
+
expected = {"ALEX", "SAM"}
|
|
158
|
+
if not expected.issubset(speakers):
|
|
159
|
+
raise LLMError(f"dual-tutor dialogue missing speakers: {expected - speakers}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _parse_dialogue(raw: str, unit_number: int) -> list[DialogueLine]:
|
|
163
|
+
lines: list[DialogueLine] = []
|
|
164
|
+
for raw_line in raw.split("\n"):
|
|
165
|
+
if not raw_line.strip():
|
|
166
|
+
continue
|
|
167
|
+
parsed = _parse_dialogue_line(raw_line, unit_number)
|
|
168
|
+
if parsed:
|
|
169
|
+
lines.append(parsed)
|
|
170
|
+
else:
|
|
171
|
+
log.debug("Skipping unparseable line: %s", raw_line[:80])
|
|
172
|
+
return lines
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from tutor.audio import sanitizer
|
|
10
|
+
from tutor.constants import SUMMARY_CACHE_DIR
|
|
11
|
+
from tutor.infra.llm import LLMFn, load_prompt
|
|
12
|
+
from tutor.models import Chunk, DialogueLine, TeachingUnit
|
|
13
|
+
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
NARRATE_VERSION = "narrate_v1"
|
|
17
|
+
_WORDS_PER_SOURCE_WORD = 1.25
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def narrate_all(
|
|
21
|
+
chunks: list[Chunk],
|
|
22
|
+
doc_title: str,
|
|
23
|
+
llm_fn: LLMFn,
|
|
24
|
+
cache_dir: str = SUMMARY_CACHE_DIR,
|
|
25
|
+
) -> tuple[list[TeachingUnit], list[list[DialogueLine]]]:
|
|
26
|
+
"""Narrate every chunk in document order. Returns (units, all_lines)."""
|
|
27
|
+
units: list[TeachingUnit] = []
|
|
28
|
+
all_lines: list[list[DialogueLine]] = []
|
|
29
|
+
total = len(chunks)
|
|
30
|
+
|
|
31
|
+
for i, chunk in enumerate(chunks):
|
|
32
|
+
lines = _narrate_chunk(chunk, i + 1, total, doc_title, llm_fn, cache_dir)
|
|
33
|
+
units.append(_chunk_to_unit(chunk, i + 1))
|
|
34
|
+
all_lines.append(lines)
|
|
35
|
+
|
|
36
|
+
return units, all_lines
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _chunk_to_unit(chunk: Chunk, unit_index: int) -> TeachingUnit:
|
|
40
|
+
word_budget = max(100, int(len(chunk.text.split()) * _WORDS_PER_SOURCE_WORD))
|
|
41
|
+
return TeachingUnit(
|
|
42
|
+
unit=unit_index,
|
|
43
|
+
concept=chunk.heading or f"Section {unit_index}",
|
|
44
|
+
source_sections=[chunk.chunk_id],
|
|
45
|
+
complexity=1,
|
|
46
|
+
word_budget=word_budget,
|
|
47
|
+
key_facts=[],
|
|
48
|
+
common_misconception="",
|
|
49
|
+
good_analogy="",
|
|
50
|
+
question_style="recall",
|
|
51
|
+
memory_hook="",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _narrate_chunk(
|
|
56
|
+
chunk: Chunk,
|
|
57
|
+
section_index: int,
|
|
58
|
+
total_sections: int,
|
|
59
|
+
doc_title: str,
|
|
60
|
+
llm_fn: LLMFn,
|
|
61
|
+
cache_dir: str,
|
|
62
|
+
) -> list[DialogueLine]:
|
|
63
|
+
cache_key = hashlib.md5((chunk.chunk_id + chunk.text + NARRATE_VERSION).encode()).hexdigest()
|
|
64
|
+
cache_file = Path(cache_dir) / f"{cache_key}.narrate.json"
|
|
65
|
+
|
|
66
|
+
if cache_file.exists():
|
|
67
|
+
log.debug(
|
|
68
|
+
"Cache hit for narration %d/%d (%s)", section_index, total_sections, chunk.heading
|
|
69
|
+
)
|
|
70
|
+
raw = json.loads(cache_file.read_text(encoding="utf-8"))
|
|
71
|
+
return [DialogueLine(**d) for d in raw]
|
|
72
|
+
|
|
73
|
+
word_budget = max(100, int(len(chunk.text.split()) * _WORDS_PER_SOURCE_WORD))
|
|
74
|
+
prompt = load_prompt("narrate.txt").format(
|
|
75
|
+
doc_title=doc_title,
|
|
76
|
+
section_index=section_index,
|
|
77
|
+
total_sections=total_sections,
|
|
78
|
+
heading=chunk.heading,
|
|
79
|
+
word_budget=word_budget,
|
|
80
|
+
section_text=chunk.text,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
log.info("Narrating section %d/%d: %s", section_index, total_sections, chunk.heading)
|
|
84
|
+
raw_text = llm_fn([{"role": "user", "content": prompt}], call_type="dialogue")
|
|
85
|
+
lines = _parse_narration(raw_text, section_index)
|
|
86
|
+
|
|
87
|
+
if not lines:
|
|
88
|
+
log.warning("No lines parsed for section %d, retrying", section_index)
|
|
89
|
+
raw_text = llm_fn([{"role": "user", "content": prompt}], call_type="dialogue")
|
|
90
|
+
lines = _parse_narration(raw_text, section_index)
|
|
91
|
+
|
|
92
|
+
for line in lines:
|
|
93
|
+
line.text = sanitizer.apply(line.text)
|
|
94
|
+
|
|
95
|
+
Path(cache_dir).mkdir(parents=True, exist_ok=True)
|
|
96
|
+
cache_file.write_text(
|
|
97
|
+
json.dumps(
|
|
98
|
+
[
|
|
99
|
+
{"speaker": ln.speaker, "text": ln.text, "unit_number": ln.unit_number}
|
|
100
|
+
for ln in lines
|
|
101
|
+
]
|
|
102
|
+
),
|
|
103
|
+
encoding="utf-8",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return lines
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _parse_narration(raw: str, unit_number: int) -> list[DialogueLine]:
|
|
110
|
+
lines: list[DialogueLine] = []
|
|
111
|
+
for raw_line in raw.split("\n"):
|
|
112
|
+
stripped = raw_line.strip()
|
|
113
|
+
if not stripped:
|
|
114
|
+
continue
|
|
115
|
+
match = re.match(r"^ALEX\s*[:\-]\s*(.+)", stripped, re.IGNORECASE)
|
|
116
|
+
if match:
|
|
117
|
+
lines.append(
|
|
118
|
+
DialogueLine(speaker="ALEX", text=match.group(1).strip(), unit_number=unit_number)
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
log.debug("Skipping unparseable narration line: %s", stripped[:80])
|
|
122
|
+
return lines
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Parse and normalise raw LLM segment responses into SlideSegment lists."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from tutor.infra.llm import parse_json_response
|
|
8
|
+
from tutor.models import VALID_VISUAL_TYPES, DialogueLine, SlideSegment
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_segments_response(
|
|
14
|
+
raw: str,
|
|
15
|
+
unit_index: int,
|
|
16
|
+
lines: list[DialogueLine],
|
|
17
|
+
) -> list[SlideSegment]:
|
|
18
|
+
"""Parse LLM JSON array into SlideSegment objects.
|
|
19
|
+
|
|
20
|
+
Validates visual_type, clamps indices, fills gaps.
|
|
21
|
+
Falls back to fallback_segments() on any parse failure.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
data = parse_json_response(raw)
|
|
25
|
+
except Exception:
|
|
26
|
+
return fallback_segments(unit_index, lines)
|
|
27
|
+
|
|
28
|
+
if not isinstance(data, list):
|
|
29
|
+
return fallback_segments(unit_index, lines)
|
|
30
|
+
|
|
31
|
+
n = len(lines)
|
|
32
|
+
result: list[SlideSegment] = []
|
|
33
|
+
|
|
34
|
+
for item in data:
|
|
35
|
+
if not isinstance(item, dict):
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
vtype = item.get("visual_type", "key_insight")
|
|
39
|
+
if vtype not in VALID_VISUAL_TYPES:
|
|
40
|
+
vtype = "key_insight"
|
|
41
|
+
|
|
42
|
+
ls = int(item.get("lines_start", 0))
|
|
43
|
+
le = int(item.get("lines_end", 0))
|
|
44
|
+
|
|
45
|
+
if ls > le:
|
|
46
|
+
ls, le = le, ls
|
|
47
|
+
|
|
48
|
+
ls = max(0, ls)
|
|
49
|
+
le = min(n - 1, le) if n > 0 else 0
|
|
50
|
+
|
|
51
|
+
title = item.get("title") or vtype.replace("_", " ").title()
|
|
52
|
+
body = item.get("body") or None
|
|
53
|
+
code = item.get("code") or None
|
|
54
|
+
language = item.get("language") or None
|
|
55
|
+
mermaid = item.get("mermaid") if vtype == "diagram" else None
|
|
56
|
+
left = item.get("left") or None
|
|
57
|
+
right = item.get("right") or None
|
|
58
|
+
rows = item.get("rows") or None
|
|
59
|
+
|
|
60
|
+
if rows is not None:
|
|
61
|
+
if not (isinstance(rows, list) and all(isinstance(r, list) for r in rows)):
|
|
62
|
+
rows = None
|
|
63
|
+
|
|
64
|
+
seg = SlideSegment(
|
|
65
|
+
unit_index=unit_index,
|
|
66
|
+
segment_index=0,
|
|
67
|
+
lines_start=ls,
|
|
68
|
+
lines_end=le,
|
|
69
|
+
visual_type=vtype,
|
|
70
|
+
title=title,
|
|
71
|
+
body=body,
|
|
72
|
+
code=code,
|
|
73
|
+
language=language,
|
|
74
|
+
mermaid=mermaid,
|
|
75
|
+
left=left,
|
|
76
|
+
right=right,
|
|
77
|
+
rows=rows,
|
|
78
|
+
)
|
|
79
|
+
result.append(_validate_segment(seg))
|
|
80
|
+
|
|
81
|
+
if not result:
|
|
82
|
+
return fallback_segments(unit_index, lines)
|
|
83
|
+
|
|
84
|
+
return fill_gaps(result, unit_index, n)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def fill_gaps(
|
|
88
|
+
raw_segments: list[SlideSegment],
|
|
89
|
+
unit_index: int,
|
|
90
|
+
total_lines: int,
|
|
91
|
+
) -> list[SlideSegment]:
|
|
92
|
+
"""Ensure every line 0..total_lines-1 is covered by exactly one segment.
|
|
93
|
+
|
|
94
|
+
Inserts key_insight segments for uncovered ranges and renumbers segment_index.
|
|
95
|
+
"""
|
|
96
|
+
if total_lines == 0:
|
|
97
|
+
return raw_segments
|
|
98
|
+
|
|
99
|
+
segs = sorted(raw_segments, key=lambda s: s.lines_start)
|
|
100
|
+
result: list[SlideSegment] = []
|
|
101
|
+
cursor = 0
|
|
102
|
+
|
|
103
|
+
for seg in segs:
|
|
104
|
+
if seg.lines_end < cursor:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
start = max(seg.lines_start, cursor)
|
|
108
|
+
|
|
109
|
+
if start > cursor:
|
|
110
|
+
result.append(_make_gap_segment(unit_index, cursor, start - 1))
|
|
111
|
+
|
|
112
|
+
adjusted = (
|
|
113
|
+
seg
|
|
114
|
+
if seg.lines_start == start
|
|
115
|
+
else SlideSegment(
|
|
116
|
+
unit_index=seg.unit_index,
|
|
117
|
+
segment_index=seg.segment_index,
|
|
118
|
+
lines_start=start,
|
|
119
|
+
lines_end=seg.lines_end,
|
|
120
|
+
visual_type=seg.visual_type,
|
|
121
|
+
title=seg.title,
|
|
122
|
+
body=seg.body,
|
|
123
|
+
code=seg.code,
|
|
124
|
+
language=seg.language,
|
|
125
|
+
mermaid=seg.mermaid,
|
|
126
|
+
left=seg.left,
|
|
127
|
+
right=seg.right,
|
|
128
|
+
rows=seg.rows,
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
result.append(adjusted)
|
|
132
|
+
cursor = adjusted.lines_end + 1
|
|
133
|
+
|
|
134
|
+
if cursor < total_lines:
|
|
135
|
+
result.append(_make_gap_segment(unit_index, cursor, total_lines - 1))
|
|
136
|
+
|
|
137
|
+
for i, seg in enumerate(result):
|
|
138
|
+
seg.segment_index = i
|
|
139
|
+
|
|
140
|
+
return result
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def fallback_segments(
|
|
144
|
+
unit_index: int,
|
|
145
|
+
lines: list[DialogueLine],
|
|
146
|
+
) -> list[SlideSegment]:
|
|
147
|
+
"""Produce minimal valid segments without LLM. Never returns an empty list."""
|
|
148
|
+
n = len(lines)
|
|
149
|
+
if n == 0:
|
|
150
|
+
return [_make_segment(unit_index, 0, 0, 0, "hook_question", "Introduction")]
|
|
151
|
+
|
|
152
|
+
segs: list[SlideSegment] = []
|
|
153
|
+
idx = 0
|
|
154
|
+
|
|
155
|
+
hook_end = 0 if n <= 2 else min(1, n - 2)
|
|
156
|
+
segs.append(_make_segment(unit_index, idx, 0, hook_end, "hook_question", "Opening Question"))
|
|
157
|
+
idx += 1
|
|
158
|
+
cursor = hook_end + 1
|
|
159
|
+
|
|
160
|
+
while cursor < n - 1:
|
|
161
|
+
end = min(cursor + 2, n - 2)
|
|
162
|
+
segs.append(_make_segment(unit_index, idx, cursor, end, "key_insight", "Key Insight"))
|
|
163
|
+
idx += 1
|
|
164
|
+
cursor = end + 1
|
|
165
|
+
|
|
166
|
+
if cursor <= n - 1:
|
|
167
|
+
segs.append(_make_segment(unit_index, idx, cursor, n - 1, "memory_hook", "Remember This"))
|
|
168
|
+
elif len(segs) == 1:
|
|
169
|
+
segs.append(_make_segment(unit_index, idx, 0, 0, "memory_hook", "Remember This"))
|
|
170
|
+
|
|
171
|
+
return segs
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# ── private helpers ───────────────────────────────────────────────────────────
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _validate_segment(seg: SlideSegment) -> SlideSegment:
|
|
178
|
+
"""Post-process a segment: reclassify types that would produce blank slides."""
|
|
179
|
+
if seg.visual_type == "step_sequence" and not seg.body:
|
|
180
|
+
log.warning(
|
|
181
|
+
"segment %d-%d is step_sequence but body is empty — falling back to definition",
|
|
182
|
+
seg.lines_start,
|
|
183
|
+
seg.lines_end,
|
|
184
|
+
)
|
|
185
|
+
seg.visual_type = "definition"
|
|
186
|
+
seg.body = seg.title
|
|
187
|
+
if seg.visual_type == "callout" and not seg.body:
|
|
188
|
+
log.warning(
|
|
189
|
+
"segment %d-%d is callout but body is empty — falling back to key_insight",
|
|
190
|
+
seg.lines_start,
|
|
191
|
+
seg.lines_end,
|
|
192
|
+
)
|
|
193
|
+
seg.visual_type = "key_insight"
|
|
194
|
+
return seg
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _make_segment(
|
|
198
|
+
unit_index: int,
|
|
199
|
+
segment_index: int,
|
|
200
|
+
lines_start: int,
|
|
201
|
+
lines_end: int,
|
|
202
|
+
visual_type: str,
|
|
203
|
+
title: str,
|
|
204
|
+
) -> SlideSegment:
|
|
205
|
+
return SlideSegment(
|
|
206
|
+
unit_index=unit_index,
|
|
207
|
+
segment_index=segment_index,
|
|
208
|
+
lines_start=lines_start,
|
|
209
|
+
lines_end=lines_end,
|
|
210
|
+
visual_type=visual_type,
|
|
211
|
+
title=title,
|
|
212
|
+
body=None,
|
|
213
|
+
code=None,
|
|
214
|
+
language=None,
|
|
215
|
+
mermaid=None,
|
|
216
|
+
left=None,
|
|
217
|
+
right=None,
|
|
218
|
+
rows=None,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _make_gap_segment(unit_index: int, start: int, end: int) -> SlideSegment:
|
|
223
|
+
return _make_segment(unit_index, -1, start, end, "key_insight", "Key Insight")
|