learnx-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. learnx_cli-0.3.0.dist-info/METADATA +240 -0
  2. learnx_cli-0.3.0.dist-info/RECORD +131 -0
  3. learnx_cli-0.3.0.dist-info/WHEEL +4 -0
  4. learnx_cli-0.3.0.dist-info/entry_points.txt +2 -0
  5. tutor/.env copy.example +4 -0
  6. tutor/__init__.py +0 -0
  7. tutor/__main__.py +4 -0
  8. tutor/assets/__init__.py +5 -0
  9. tutor/assets/html/fonts/Inter-Bold.woff2 +0 -0
  10. tutor/assets/html/fonts/Inter-Regular.woff2 +0 -0
  11. tutor/assets/html/fonts/Inter-SemiBold.woff2 +0 -0
  12. tutor/assets/html/fonts/JetBrainsMono-Regular.woff2 +0 -0
  13. tutor/assets/html/highlight-java.min.js +2 -0
  14. tutor/assets/html/highlight-javascript.min.js +2 -0
  15. tutor/assets/html/highlight-python.min.js +2 -0
  16. tutor/assets/html/highlight.min.js +17 -0
  17. tutor/assets/html/mermaid.min.js +31 -0
  18. tutor/assets/html/slide_base.css +464 -0
  19. tutor/assets/html/theme-learnx-dark.css +12 -0
  20. tutor/audio/__init__.py +0 -0
  21. tutor/audio/audio_builder.py +143 -0
  22. tutor/audio/sanitizer.py +9 -0
  23. tutor/audio/tts_renderer.py +54 -0
  24. tutor/cli/__init__.py +0 -0
  25. tutor/cli/commands.py +391 -0
  26. tutor/cli/logo.py +21 -0
  27. tutor/cli/playback_commands.py +239 -0
  28. tutor/cli/shell.py +91 -0
  29. tutor/cli/shell_context.py +18 -0
  30. tutor/cli/theme.py +39 -0
  31. tutor/cli/video_commands.py +123 -0
  32. tutor/config.py +122 -0
  33. tutor/conftest.py +5 -0
  34. tutor/constants.py +82 -0
  35. tutor/exceptions.py +26 -0
  36. tutor/generation/__init__.py +0 -0
  37. tutor/generation/assembler.py +81 -0
  38. tutor/generation/curriculum.py +97 -0
  39. tutor/generation/dialogue.py +172 -0
  40. tutor/generation/narrator.py +122 -0
  41. tutor/generation/segment_parser.py +223 -0
  42. tutor/generation/segment_planner.py +200 -0
  43. tutor/generation/visual_planner.py +205 -0
  44. tutor/infra/__init__.py +0 -0
  45. tutor/infra/llm.py +152 -0
  46. tutor/ingestion/__init__.py +0 -0
  47. tutor/ingestion/chunker.py +171 -0
  48. tutor/ingestion/doc_analyzer.py +41 -0
  49. tutor/ingestion/parse_content.py +19 -0
  50. tutor/ingestion/summarizer.py +51 -0
  51. tutor/inspector.py +117 -0
  52. tutor/llm_config.toml +58 -0
  53. tutor/models.py +147 -0
  54. tutor/player/__init__.py +0 -0
  55. tutor/player/input_handler.py +45 -0
  56. tutor/player/player.py +308 -0
  57. tutor/player/player_display.py +117 -0
  58. tutor/prompts/curriculum.txt +67 -0
  59. tutor/prompts/dialogue.txt +62 -0
  60. tutor/prompts/narrate.txt +34 -0
  61. tutor/prompts/qa.txt +17 -0
  62. tutor/prompts/summarize.txt +9 -0
  63. tutor/prompts/visual.txt +60 -0
  64. tutor/prompts/visual_v3.txt +91 -0
  65. tutor/qa/__init__.py +0 -0
  66. tutor/qa/qa.py +105 -0
  67. tutor/requirements-dev.txt +2 -0
  68. tutor/requirements.txt +12 -0
  69. tutor/sample_docs/headingless_large.md +1 -0
  70. tutor/sample_docs/headingless_test.md +1 -0
  71. tutor/sample_docs/java-basics.md +78 -0
  72. tutor/tests/__init__.py +0 -0
  73. tutor/tests/audio/__init__.py +0 -0
  74. tutor/tests/audio/test_audio_builder.py +106 -0
  75. tutor/tests/audio/test_sanitizer.py +41 -0
  76. tutor/tests/cli/__init__.py +0 -0
  77. tutor/tests/cli/test_commands.py +67 -0
  78. tutor/tests/cli/test_video_commands.py +190 -0
  79. tutor/tests/e2e/README.md +61 -0
  80. tutor/tests/e2e/__init__.py +0 -0
  81. tutor/tests/e2e/conftest.py +117 -0
  82. tutor/tests/e2e/fixtures/README.md +17 -0
  83. tutor/tests/e2e/fixtures/sample.md +13 -0
  84. tutor/tests/e2e/test_audio_quality.py +40 -0
  85. tutor/tests/e2e/test_av_sync.py +56 -0
  86. tutor/tests/e2e/test_pipeline_smoke.py +37 -0
  87. tutor/tests/e2e/test_slide_render.py +72 -0
  88. tutor/tests/e2e/test_video_streams.py +104 -0
  89. tutor/tests/generation/__init__.py +0 -0
  90. tutor/tests/generation/conftest.py +134 -0
  91. tutor/tests/generation/test_assembler.py +64 -0
  92. tutor/tests/generation/test_curriculum.py +107 -0
  93. tutor/tests/generation/test_narrator.py +165 -0
  94. tutor/tests/generation/test_segment_edge_cases.py +280 -0
  95. tutor/tests/generation/test_segment_planner.py +324 -0
  96. tutor/tests/generation/test_visual_planner.py +319 -0
  97. tutor/tests/ingestion/__init__.py +0 -0
  98. tutor/tests/ingestion/test_chunker.py +94 -0
  99. tutor/tests/ingestion/test_doc_analyzer.py +51 -0
  100. tutor/tests/player/__init__.py +0 -0
  101. tutor/tests/player/test_player_states.py +88 -0
  102. tutor/tests/test_assets.py +39 -0
  103. tutor/tests/test_models_visual.py +180 -0
  104. tutor/tests/visual/__init__.py +0 -0
  105. tutor/tests/visual/test_beat_timer.py +321 -0
  106. tutor/tests/visual/test_pipeline_integration.py +178 -0
  107. tutor/tests/visual/test_slide_renderer.py +298 -0
  108. tutor/tests/visual/test_subtitle_writer.py +165 -0
  109. tutor/tests/visual/test_video_assembler.py +108 -0
  110. tutor/tests/visual/test_visual_pipeline.py +270 -0
  111. tutor/tutor.py +365 -0
  112. tutor/visual/__init__.py +213 -0
  113. tutor/visual/beat_timer.py +222 -0
  114. tutor/visual/slide_renderer.py +236 -0
  115. tutor/visual/subtitle_writer.py +187 -0
  116. tutor/visual/templates/_base.html.j2 +40 -0
  117. tutor/visual/templates/analogy.html.j2 +21 -0
  118. tutor/visual/templates/callout.html.j2 +10 -0
  119. tutor/visual/templates/code_example.html.j2 +12 -0
  120. tutor/visual/templates/comparison.html.j2 +28 -0
  121. tutor/visual/templates/decision_guide.html.j2 +37 -0
  122. tutor/visual/templates/definition.html.j2 +13 -0
  123. tutor/visual/templates/diagram.html.j2 +11 -0
  124. tutor/visual/templates/hook_question.html.j2 +17 -0
  125. tutor/visual/templates/key_insight.html.j2 +9 -0
  126. tutor/visual/templates/memory_hook.html.j2 +7 -0
  127. tutor/visual/templates/outro.html.j2 +16 -0
  128. tutor/visual/templates/question_prompt.html.j2 +13 -0
  129. tutor/visual/templates/step_sequence.html.j2 +14 -0
  130. tutor/visual/templates/title_card.html.j2 +12 -0
  131. tutor/visual/video_assembler.py +299 -0
@@ -0,0 +1,270 @@
1
+ """
2
+ Tests for tutor/visual/__init__.py helpers:
3
+ _doc_title_from_units, _load_all_lines, _mp3_duration, _format_duration
4
+ """
5
+
6
+ import json
7
+ import subprocess
8
+
9
+ import pytest
10
+
11
+ from tutor.models import DialogueLine
12
+ from tutor.visual import _doc_title_from_units, _format_duration, _load_all_lines, _mp3_duration
13
+
14
+ # ── _doc_title_from_units ────────────────────────────────────────────────────
15
+
16
+
17
+ def test_doc_title_returns_first_concept(tmp_path):
18
+ units_json = tmp_path / "tutorial.units.json"
19
+ units_json.write_text(
20
+ json.dumps([{"concept": "Interfaces"}, {"concept": "Abstract Classes"}]),
21
+ encoding="utf-8",
22
+ )
23
+ assert _doc_title_from_units(units_json) == "Interfaces"
24
+
25
+
26
+ def test_doc_title_fallback_when_empty_list(tmp_path):
27
+ units_json = tmp_path / "tutorial.units.json"
28
+ units_json.write_text("[]", encoding="utf-8")
29
+ assert _doc_title_from_units(units_json) == "Tutorial"
30
+
31
+
32
+ def test_doc_title_fallback_when_file_missing(tmp_path):
33
+ missing = tmp_path / "nonexistent.json"
34
+ assert _doc_title_from_units(missing) == "Tutorial"
35
+
36
+
37
+ def test_doc_title_fallback_when_malformed_json(tmp_path):
38
+ units_json = tmp_path / "tutorial.units.json"
39
+ units_json.write_text("this is not JSON", encoding="utf-8")
40
+ assert _doc_title_from_units(units_json) == "Tutorial"
41
+
42
+
43
+ def test_doc_title_uses_concept_key(tmp_path):
44
+ units_json = tmp_path / "tutorial.units.json"
45
+ # "concept" key missing — falls back to "Tutorial"
46
+ units_json.write_text(json.dumps([{"name": "Something Else"}]), encoding="utf-8")
47
+ assert _doc_title_from_units(units_json) == "Tutorial"
48
+
49
+
50
+ # ── _load_all_lines — from units JSON lines field ────────────────────────────
51
+
52
+
53
+ def test_load_all_lines_from_units_json(tmp_path):
54
+ units_json = tmp_path / "tutorial.units.json"
55
+ units_data = [
56
+ {
57
+ "concept": "Interfaces",
58
+ "lines": [
59
+ {"speaker": "ALEX", "text": "What is an interface?", "unit_number": 1},
60
+ {"speaker": "MAYA", "text": "A contract.", "unit_number": 1},
61
+ ],
62
+ }
63
+ ]
64
+ units_json.write_text(json.dumps(units_data), encoding="utf-8")
65
+ lines = _load_all_lines(units_json)
66
+
67
+ assert len(lines) == 2
68
+ assert all(isinstance(line, DialogueLine) for line in lines)
69
+ assert lines[0].speaker == "ALEX"
70
+ assert lines[1].speaker == "MAYA"
71
+
72
+
73
+ def test_load_all_lines_from_multiple_units(tmp_path):
74
+ units_json = tmp_path / "tutorial.units.json"
75
+ units_data = [
76
+ {
77
+ "concept": "Unit1",
78
+ "lines": [
79
+ {"speaker": "ALEX", "text": "Line from unit 1", "unit_number": 1},
80
+ ],
81
+ },
82
+ {
83
+ "concept": "Unit2",
84
+ "lines": [
85
+ {"speaker": "MAYA", "text": "Line from unit 2", "unit_number": 2},
86
+ {"speaker": "ALEX", "text": "Another unit 2 line", "unit_number": 2},
87
+ ],
88
+ },
89
+ ]
90
+ units_json.write_text(json.dumps(units_data), encoding="utf-8")
91
+ lines = _load_all_lines(units_json)
92
+
93
+ assert len(lines) == 3
94
+ assert lines[0].unit_number == 1
95
+ assert lines[1].unit_number == 2
96
+
97
+
98
+ # ── _load_all_lines — fallback to tutorial.script.txt ───────────────────────
99
+
100
+
101
+ def test_load_all_lines_fallback_to_script_txt(tmp_path):
102
+ """No 'lines' in JSON → falls back to tutorial.script.txt."""
103
+ units_json = tmp_path / "tutorial.units.json"
104
+ units_json.write_text(json.dumps([{"concept": "Interfaces"}]), encoding="utf-8")
105
+
106
+ script_txt = tmp_path / "tutorial.script.txt"
107
+ script_txt.write_text(
108
+ "ALEX: What is an interface?\nMAYA: A contract between a class and the world.\n",
109
+ encoding="utf-8",
110
+ )
111
+ lines = _load_all_lines(units_json)
112
+
113
+ assert len(lines) == 2
114
+ assert lines[0].speaker == "ALEX"
115
+ assert lines[1].speaker == "MAYA"
116
+
117
+
118
+ def test_load_all_lines_returns_empty_when_no_lines_no_script(tmp_path):
119
+ units_json = tmp_path / "tutorial.units.json"
120
+ units_json.write_text(json.dumps([{"concept": "X"}]), encoding="utf-8")
121
+ # No script file present
122
+ lines = _load_all_lines(units_json)
123
+ assert lines == []
124
+
125
+
126
+ def test_load_all_lines_script_txt_filters_non_speaker_lines(tmp_path):
127
+ units_json = tmp_path / "tutorial.units.json"
128
+ units_json.write_text(json.dumps([{"concept": "X"}]), encoding="utf-8")
129
+
130
+ script_txt = tmp_path / "tutorial.script.txt"
131
+ script_txt.write_text(
132
+ "NARRATOR: This line should be ignored.\n"
133
+ "ALEX: Valid line.\n"
134
+ " This is a blank-ish line.\n"
135
+ "MAYA: Another valid line.\n",
136
+ encoding="utf-8",
137
+ )
138
+ lines = _load_all_lines(units_json)
139
+ # NARRATOR is not a known speaker
140
+ speakers = {line.speaker for line in lines}
141
+ assert "NARRATOR" not in speakers
142
+ assert "ALEX" in speakers
143
+ assert "MAYA" in speakers
144
+
145
+
146
+ def test_load_all_lines_empty_units_json_list(tmp_path):
147
+ units_json = tmp_path / "tutorial.units.json"
148
+ units_json.write_text("[]", encoding="utf-8")
149
+ lines = _load_all_lines(units_json)
150
+ assert lines == []
151
+
152
+
153
+ def test_load_all_lines_script_assigns_units_sequentially(tmp_path):
154
+ """Lines from script.txt should be distributed across units."""
155
+ units_json = tmp_path / "tutorial.units.json"
156
+ units_json.write_text(
157
+ json.dumps([{"concept": "A"}, {"concept": "B"}]),
158
+ encoding="utf-8",
159
+ )
160
+ script_txt = tmp_path / "tutorial.script.txt"
161
+ # 4 lines, 2 units → 2 per unit
162
+ script_txt.write_text(
163
+ "ALEX: Line 1\nMAYA: Line 2\nALEX: Line 3\nMAYA: Line 4\n",
164
+ encoding="utf-8",
165
+ )
166
+ lines = _load_all_lines(units_json)
167
+ assert len(lines) == 4
168
+ unit_numbers = {line.unit_number for line in lines}
169
+ # Should span at least 2 distinct unit numbers (1 and 2)
170
+ assert len(unit_numbers) >= 1
171
+
172
+
173
+ # ── _mp3_duration ─────────────────────────────────────────────────────────────
174
+
175
+
176
+ def test_mp3_duration_returns_float_on_success(monkeypatch, tmp_path):
177
+ fake_mp3 = tmp_path / "unit_01.mp3"
178
+ fake_mp3.touch()
179
+
180
+ def mock_run(cmd, **kwargs):
181
+ return subprocess.CompletedProcess(cmd, 0, b"45.321\n", b"")
182
+
183
+ monkeypatch.setattr(subprocess, "run", mock_run)
184
+ duration = _mp3_duration(fake_mp3)
185
+ assert duration == pytest.approx(45.321, abs=0.001)
186
+
187
+
188
+ def test_mp3_duration_returns_zero_on_ffprobe_error(monkeypatch, tmp_path):
189
+ fake_mp3 = tmp_path / "unit_01.mp3"
190
+ fake_mp3.touch()
191
+
192
+ def failing_run(cmd, **kwargs):
193
+ raise FileNotFoundError("ffprobe not found")
194
+
195
+ monkeypatch.setattr(subprocess, "run", failing_run)
196
+ assert _mp3_duration(fake_mp3) == 0.0
197
+
198
+
199
+ def test_mp3_duration_returns_zero_on_bad_output(monkeypatch, tmp_path):
200
+ fake_mp3 = tmp_path / "unit_01.mp3"
201
+ fake_mp3.touch()
202
+
203
+ def mock_run(cmd, **kwargs):
204
+ return subprocess.CompletedProcess(cmd, 0, b"N/A\n", b"")
205
+
206
+ monkeypatch.setattr(subprocess, "run", mock_run)
207
+ assert _mp3_duration(fake_mp3) == 0.0
208
+
209
+
210
+ def test_mp3_duration_returns_zero_on_timeout(monkeypatch, tmp_path):
211
+ fake_mp3 = tmp_path / "unit_01.mp3"
212
+ fake_mp3.touch()
213
+
214
+ def timeout_run(cmd, **kwargs):
215
+ raise subprocess.TimeoutExpired(cmd, 10)
216
+
217
+ monkeypatch.setattr(subprocess, "run", timeout_run)
218
+ assert _mp3_duration(fake_mp3) == 0.0
219
+
220
+
221
+ # ── _format_duration ─────────────────────────────────────────────────────────
222
+
223
+
224
+ def test_format_duration_zero():
225
+ assert _format_duration(0) == "0:00"
226
+
227
+
228
+ def test_format_duration_one_minute():
229
+ assert _format_duration(60) == "1:00"
230
+
231
+
232
+ def test_format_duration_90_seconds():
233
+ assert _format_duration(90) == "1:30"
234
+
235
+
236
+ def test_format_duration_pads_seconds():
237
+ assert _format_duration(65) == "1:05"
238
+
239
+
240
+ def test_format_duration_over_one_hour():
241
+ assert _format_duration(3661) == "61:01"
242
+
243
+
244
+ def test_format_duration_fractional_truncated():
245
+ """Fractional seconds should be truncated to int, not rounded."""
246
+ assert _format_duration(59.9) == "0:59"
247
+
248
+
249
+ # ── _UNIT_MP3_RE ──────────────────────────────────────────────────────────────
250
+
251
+
252
+ def test_unit_mp3_re_matches_valid_stems():
253
+ from tutor.visual import _UNIT_MP3_RE
254
+
255
+ assert _UNIT_MP3_RE.match("unit_01")
256
+ assert _UNIT_MP3_RE.match("unit_10")
257
+ assert _UNIT_MP3_RE.match("unit_99")
258
+
259
+
260
+ def test_unit_mp3_re_rejects_intro():
261
+ from tutor.visual import _UNIT_MP3_RE
262
+
263
+ assert _UNIT_MP3_RE.match("unit_00_intro") is None
264
+
265
+
266
+ def test_unit_mp3_re_rejects_non_unit_names():
267
+ from tutor.visual import _UNIT_MP3_RE
268
+
269
+ assert _UNIT_MP3_RE.match("tutorial") is None
270
+ assert _UNIT_MP3_RE.match("outro") is None
tutor/tutor.py ADDED
@@ -0,0 +1,365 @@
1
+ import argparse
2
+ import asyncio
3
+ import io
4
+ import json
5
+ import logging
6
+ import shutil
7
+ import sys
8
+ from dataclasses import asdict
9
+ from functools import partial
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from tutor.player.player import TutorPlayer
15
+
16
+ from tutor import inspector
17
+ from tutor.audio import audio_builder
18
+ from tutor.config import preflight
19
+ from tutor.constants import (
20
+ DEFAULT_DIFFICULTY,
21
+ DEFAULT_DURATION_MIN,
22
+ DEFAULT_FORMAT,
23
+ DEFAULT_SUBJECT,
24
+ WPM,
25
+ )
26
+ from tutor.exceptions import TutorError
27
+ from tutor.generation import assembler, curriculum, dialogue, narrator
28
+ from tutor.infra import llm
29
+ from tutor.ingestion import chunker, doc_analyzer, summarizer
30
+ from tutor.models import Chunk, DialogueLine, DocProfile, TeachingUnit
31
+
32
+
33
+ def main() -> None:
34
+ # Force UTF-8 output on Windows so LLM-generated unicode (≠, →, etc.) doesn't crash.
35
+ # Done here (not at module level) so pytest's stdout capture isn't affected.
36
+ if hasattr(sys.stdout, "buffer"):
37
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
38
+ if hasattr(sys.stderr, "buffer"):
39
+ sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
40
+
41
+ # Detect "play" subcommand before building the main parser — argparse
42
+ # cannot handle a positional that is either a subcommand or a file path.
43
+ if len(sys.argv) > 1 and sys.argv[1] == "play":
44
+ _run_play()
45
+ return
46
+
47
+ parser = argparse.ArgumentParser(prog="tutor", description="Tutor AI — Java audio sessions")
48
+ parser.add_argument("input", nargs="?", help="Path to input .md file")
49
+ parser.add_argument("--output", default="tutorial.mp3")
50
+ parser.add_argument("--provider", default="groq")
51
+ parser.add_argument("--duration", type=int, default=DEFAULT_DURATION_MIN)
52
+ parser.add_argument("--format", default=DEFAULT_FORMAT, dest="fmt")
53
+ parser.add_argument("--difficulty", default=DEFAULT_DIFFICULTY)
54
+ parser.add_argument("--units", type=int, default=None)
55
+ parser.add_argument("--subject", default=DEFAULT_SUBJECT)
56
+ parser.add_argument("--topic", default=None)
57
+ parser.add_argument("--play", action="store_true")
58
+ parser.add_argument("--script-only", action="store_true", dest="script_only")
59
+ parser.add_argument("--dry-run", action="store_true", dest="dry_run")
60
+ parser.add_argument("--inspect", action="store_true")
61
+ parser.add_argument("--show-summaries", action="store_true", dest="show_summaries")
62
+ parser.add_argument("--no-cache", action="store_true", dest="no_cache")
63
+ parser.add_argument("--verbose", action="store_true")
64
+ parser.add_argument("--debug", action="store_true")
65
+ mode_group = parser.add_mutually_exclusive_group()
66
+ mode_group.add_argument(
67
+ "--explain",
68
+ action="store_true",
69
+ help="Narrate the document top-to-bottom (read-along mode)",
70
+ )
71
+ mode_group.add_argument(
72
+ "--conversation", action="store_true", help="Concept-driven dialogue (default)"
73
+ )
74
+
75
+ args = parser.parse_args()
76
+ _setup_logging(args)
77
+
78
+ try:
79
+ cmd_generate(args)
80
+ except TutorError as e:
81
+ print(f"\n✗ {e}", file=sys.stderr)
82
+ sys.exit(1)
83
+
84
+
85
+ def _run_play() -> None:
86
+ parser = argparse.ArgumentParser(prog="tutor play")
87
+ parser.add_argument("audio_file")
88
+ parser.add_argument("--provider", default="groq")
89
+ parser.add_argument("--no-qa", action="store_true", dest="no_qa")
90
+ parser.add_argument("--verbose", action="store_true")
91
+ parser.add_argument("--debug", action="store_true")
92
+ args = parser.parse_args(sys.argv[2:])
93
+ _setup_logging(args)
94
+ try:
95
+ cmd_play(args)
96
+ except TutorError as e:
97
+ print(f"\n✗ {e}", file=sys.stderr)
98
+ sys.exit(1)
99
+
100
+
101
+ def cmd_generate(args: argparse.Namespace) -> None:
102
+ pipeline_mode = _mode(args)
103
+ config = preflight(args.input, args.provider, pipeline_mode)
104
+ llm_fn = partial(llm.chat, provider=args.provider, config=config)
105
+
106
+ if args.no_cache:
107
+ cache_dir = Path(".tutor_cache")
108
+ if cache_dir.exists():
109
+ shutil.rmtree(cache_dir)
110
+ print("Cache cleared (all summaries and dialogues will be regenerated).")
111
+
112
+ profile = doc_analyzer.analyze(args.input)
113
+ chunks = chunker.chunk(Path(args.input).read_text(encoding="utf-8"), profile)
114
+
115
+ if args.inspect:
116
+ _run_inspect(args, profile, chunks)
117
+ return
118
+
119
+ if args.subject not in ("java", "general"):
120
+ print(f"Warning: --subject {args.subject!r} is not supported yet; proceeding as 'general'.")
121
+
122
+ doc_title = Path(args.input).stem.replace("-", " ").replace("_", " ").title()
123
+ is_explain = getattr(args, "explain", False)
124
+
125
+ if is_explain:
126
+ _save_chunks(chunks, args.output)
127
+ units, all_lines = narrator.narrate_all(chunks, doc_title, llm_fn)
128
+ script = assembler.assemble(units, all_lines, "explain", doc_title, mode="explain")
129
+ else:
130
+ print(f"Summarising {len(chunks)} chunk(s)...")
131
+ chunks = summarizer.summarize_all(chunks, llm_fn)
132
+ _save_chunks(chunks, args.output)
133
+
134
+ print("Planning curriculum...")
135
+ units = curriculum.plan(chunks, profile, args.duration, llm_fn, args.difficulty, args.topic)
136
+ if args.units:
137
+ units = units[: args.units]
138
+
139
+ if args.dry_run:
140
+ inspector.report_curriculum(units, chunks, args.duration)
141
+ return
142
+
143
+ print(f"Generating dialogue for {len(units)} unit(s)...")
144
+ all_lines = []
145
+ for i, u in enumerate(units, 1):
146
+ print(f" [{i}/{len(units)}] {u.concept}")
147
+ all_lines.append(dialogue.generate(u, chunks, args.fmt, llm_fn, args.difficulty))
148
+ script = assembler.assemble(units, all_lines, args.fmt, doc_title)
149
+
150
+ _print_duration_estimate(script)
151
+
152
+ if args.script_only:
153
+ _run_script_only(script)
154
+ return
155
+
156
+ _run_audio(args, units, script)
157
+
158
+ if getattr(args, "play", False):
159
+ cmd_play(args)
160
+
161
+
162
+ def _run_inspect(args: argparse.Namespace, profile: DocProfile, chunks: list[Chunk]) -> None:
163
+ inspector.report_ingestion(profile, chunks)
164
+ if args.show_summaries:
165
+ for c in chunks:
166
+ print(f"\n--- {c.chunk_id} ---\n{c.summary}")
167
+
168
+
169
+ def _run_script_only(script: list[DialogueLine]) -> None:
170
+ for line in script:
171
+ print(f"{line.speaker}: {line.text}")
172
+
173
+
174
+ def _save_chunks(chunks: list[Chunk], output_path: str) -> None:
175
+ chunks_path = Path(output_path).parent / "tutorial.chunks.json"
176
+ with open(chunks_path, "w", encoding="utf-8") as f:
177
+ json.dump([asdict(c) for c in chunks], f, indent=2, ensure_ascii=False)
178
+
179
+
180
+ def _run_audio(
181
+ args: argparse.Namespace, units: list[TeachingUnit], script: list[DialogueLine]
182
+ ) -> None:
183
+ script_path = Path(args.output).with_suffix(".script.txt")
184
+ units_dir = str(Path(args.output).parent / "tutorial_units")
185
+
186
+ with open(script_path, "w", encoding="utf-8") as f:
187
+ for line in script:
188
+ f.write(f"{line.speaker}: {line.text}\n")
189
+ print(f"Script saved: {script_path}")
190
+ print("Generating audio — this takes 2–4 minutes for a 20-min session...")
191
+
192
+ asyncio.run(audio_builder.build(script, args.output, units_dir))
193
+
194
+ units_json_path = Path(args.output).parent / "tutorial.units.json"
195
+ with open(units_json_path, "w", encoding="utf-8") as f:
196
+ json.dump([asdict(u) for u in units], f, indent=2, ensure_ascii=False)
197
+
198
+ print("\nDone.")
199
+ print(f" Audio: {args.output}")
200
+ print(f" Units: {units_dir}/")
201
+ print(f" Script: {script_path}")
202
+ print(f" Meta: {units_json_path}")
203
+
204
+
205
+ def cmd_play(args: argparse.Namespace) -> None:
206
+ player = _build_player(args)
207
+ player.run()
208
+
209
+
210
+ def _build_player(args: argparse.Namespace) -> "TutorPlayer":
211
+ """Build and return a configured TutorPlayer without starting it."""
212
+ import json
213
+ from datetime import datetime
214
+ from functools import partial as _partial
215
+
216
+ from tutor.config import load_config
217
+ from tutor.exceptions import PlayerError
218
+ from tutor.infra import llm as _llm
219
+ from tutor.models import Chunk, SessionLog, TeachingUnit
220
+ from tutor.player.player import TutorPlayer
221
+
222
+ _log = logging.getLogger(__name__)
223
+
224
+ if hasattr(args, "audio_file"):
225
+ audio_path = Path(args.audio_file)
226
+ units_dir = audio_path if audio_path.is_dir() else audio_path.parent / "tutorial_units"
227
+ else:
228
+ units_dir = Path(args.output).parent / "tutorial_units"
229
+
230
+ if not units_dir.exists():
231
+ raise PlayerError(
232
+ f"tutorial_units/ not found at {units_dir}.\n Run generation first or use /generate."
233
+ )
234
+
235
+ unit_files = sorted(units_dir.glob("*.mp3"))
236
+ if not unit_files:
237
+ raise PlayerError(f"No .mp3 files found in {units_dir}")
238
+
239
+ units_json = units_dir.parent / "tutorial.units.json"
240
+ if units_json.exists():
241
+ with open(units_json, encoding="utf-8") as f:
242
+ raw_units = json.load(f)
243
+ for u in raw_units:
244
+ u.setdefault("prerequisite_concepts", [])
245
+ u.setdefault("js_contrast", "")
246
+ u.setdefault("production_relevance", "")
247
+ units = [TeachingUnit(**u) for u in raw_units]
248
+ else:
249
+ units = [
250
+ TeachingUnit(
251
+ unit=i,
252
+ concept=f.stem.replace("_", " ").title(),
253
+ source_sections=[],
254
+ complexity=2,
255
+ word_budget=400,
256
+ key_facts=[],
257
+ common_misconception="",
258
+ good_analogy="",
259
+ question_style="recall",
260
+ memory_hook="",
261
+ )
262
+ for i, f in enumerate(unit_files)
263
+ ]
264
+
265
+ chunks_path = units_dir.parent / "tutorial.chunks.json"
266
+ if chunks_path.exists():
267
+ with open(chunks_path, encoding="utf-8") as f:
268
+ raw_chunks = json.load(f)
269
+ chunks = [Chunk(**c) for c in raw_chunks]
270
+ else:
271
+ chunks = []
272
+ _log.warning("tutorial.chunks.json not found — Q&A will work without source context")
273
+
274
+ no_qa = getattr(args, "no_qa", False)
275
+ provider = getattr(args, "provider", "groq")
276
+ if no_qa:
277
+ llm_fn = None
278
+ else:
279
+ try:
280
+ config = load_config()
281
+ llm_fn = _partial(_llm.chat, provider=provider, config=config)
282
+ except Exception:
283
+ llm_fn = None
284
+ _log.warning("Could not load config for Q&A — Q&A will be unavailable")
285
+
286
+ session = SessionLog(
287
+ source_file=str(getattr(args, "audio_file", getattr(args, "output", "unknown"))),
288
+ session_start=datetime.utcnow().isoformat(),
289
+ format="tutor-student",
290
+ duration_minutes=20,
291
+ )
292
+
293
+ return TutorPlayer(
294
+ unit_files=[str(f) for f in unit_files],
295
+ units=units,
296
+ chunks=chunks,
297
+ session=session,
298
+ llm_fn=llm_fn,
299
+ no_qa=no_qa,
300
+ )
301
+
302
+
303
+ def _make_generate_parser() -> argparse.ArgumentParser:
304
+ """Return an ArgumentParser for the /generate shell command."""
305
+ parser = argparse.ArgumentParser(prog="generate", add_help=False)
306
+ parser.add_argument("input", nargs="?")
307
+ parser.add_argument("--output", default="tutorial.mp3")
308
+ parser.add_argument("--provider", default="groq")
309
+ parser.add_argument("--duration", type=int, default=DEFAULT_DURATION_MIN)
310
+ parser.add_argument("--format", default=DEFAULT_FORMAT, dest="fmt")
311
+ parser.add_argument("--difficulty", default=DEFAULT_DIFFICULTY)
312
+ parser.add_argument("--units", type=int, default=None)
313
+ parser.add_argument("--subject", default=DEFAULT_SUBJECT)
314
+ parser.add_argument("--topic", default=None)
315
+ parser.add_argument("--script-only", action="store_true", dest="script_only")
316
+ parser.add_argument("--dry-run", action="store_true", dest="dry_run")
317
+ parser.add_argument("--inspect", action="store_true")
318
+ parser.add_argument("--show-summaries", action="store_true", dest="show_summaries")
319
+ parser.add_argument("--no-cache", action="store_true", dest="no_cache")
320
+ parser.add_argument("--verbose", action="store_true")
321
+ parser.add_argument("--debug", action="store_true")
322
+ mode_group = parser.add_mutually_exclusive_group()
323
+ mode_group.add_argument("--explain", action="store_true")
324
+ mode_group.add_argument("--conversation", action="store_true")
325
+ return parser
326
+
327
+
328
+ def _mode(args: argparse.Namespace) -> str:
329
+ if getattr(args, "inspect", False):
330
+ return "inspect"
331
+ if getattr(args, "dry_run", False):
332
+ return "dry-run"
333
+ if getattr(args, "script_only", False):
334
+ return "script-only"
335
+ return "generate"
336
+
337
+
338
+ def _print_duration_estimate(script: list[DialogueLine]) -> None:
339
+ total_words = sum(len(line.text.split()) for line in script)
340
+ dialogue_secs = (total_words / WPM) * 60
341
+ silence_secs = 80
342
+ total_secs = int(dialogue_secs + silence_secs)
343
+ mins, secs = divmod(total_secs, 60)
344
+ print("\n=== Duration Estimate ===")
345
+ print(f"Script words: {total_words:,}")
346
+ print(f"Estimated: ~{mins}m {secs:02d}s (incl. pauses)")
347
+
348
+
349
+ def _setup_logging(args: argparse.Namespace) -> None:
350
+ if getattr(args, "debug", False):
351
+ level = logging.DEBUG
352
+ logging.basicConfig(
353
+ level=level,
354
+ filename="tutor.log",
355
+ format="%(asctime)s %(name)s %(levelname)s %(message)s",
356
+ )
357
+ elif getattr(args, "verbose", False):
358
+ level = logging.INFO
359
+ logging.basicConfig(level=level, format="%(levelname)s %(message)s")
360
+ else:
361
+ logging.basicConfig(level=logging.WARNING, format="%(levelname)s %(message)s")
362
+
363
+
364
+ if __name__ == "__main__":
365
+ main()