learnx-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. learnx_cli-0.3.0.dist-info/METADATA +240 -0
  2. learnx_cli-0.3.0.dist-info/RECORD +131 -0
  3. learnx_cli-0.3.0.dist-info/WHEEL +4 -0
  4. learnx_cli-0.3.0.dist-info/entry_points.txt +2 -0
  5. tutor/.env copy.example +4 -0
  6. tutor/__init__.py +0 -0
  7. tutor/__main__.py +4 -0
  8. tutor/assets/__init__.py +5 -0
  9. tutor/assets/html/fonts/Inter-Bold.woff2 +0 -0
  10. tutor/assets/html/fonts/Inter-Regular.woff2 +0 -0
  11. tutor/assets/html/fonts/Inter-SemiBold.woff2 +0 -0
  12. tutor/assets/html/fonts/JetBrainsMono-Regular.woff2 +0 -0
  13. tutor/assets/html/highlight-java.min.js +2 -0
  14. tutor/assets/html/highlight-javascript.min.js +2 -0
  15. tutor/assets/html/highlight-python.min.js +2 -0
  16. tutor/assets/html/highlight.min.js +17 -0
  17. tutor/assets/html/mermaid.min.js +31 -0
  18. tutor/assets/html/slide_base.css +464 -0
  19. tutor/assets/html/theme-learnx-dark.css +12 -0
  20. tutor/audio/__init__.py +0 -0
  21. tutor/audio/audio_builder.py +143 -0
  22. tutor/audio/sanitizer.py +9 -0
  23. tutor/audio/tts_renderer.py +54 -0
  24. tutor/cli/__init__.py +0 -0
  25. tutor/cli/commands.py +391 -0
  26. tutor/cli/logo.py +21 -0
  27. tutor/cli/playback_commands.py +239 -0
  28. tutor/cli/shell.py +91 -0
  29. tutor/cli/shell_context.py +18 -0
  30. tutor/cli/theme.py +39 -0
  31. tutor/cli/video_commands.py +123 -0
  32. tutor/config.py +122 -0
  33. tutor/conftest.py +5 -0
  34. tutor/constants.py +82 -0
  35. tutor/exceptions.py +26 -0
  36. tutor/generation/__init__.py +0 -0
  37. tutor/generation/assembler.py +81 -0
  38. tutor/generation/curriculum.py +97 -0
  39. tutor/generation/dialogue.py +172 -0
  40. tutor/generation/narrator.py +122 -0
  41. tutor/generation/segment_parser.py +223 -0
  42. tutor/generation/segment_planner.py +200 -0
  43. tutor/generation/visual_planner.py +205 -0
  44. tutor/infra/__init__.py +0 -0
  45. tutor/infra/llm.py +152 -0
  46. tutor/ingestion/__init__.py +0 -0
  47. tutor/ingestion/chunker.py +171 -0
  48. tutor/ingestion/doc_analyzer.py +41 -0
  49. tutor/ingestion/parse_content.py +19 -0
  50. tutor/ingestion/summarizer.py +51 -0
  51. tutor/inspector.py +117 -0
  52. tutor/llm_config.toml +58 -0
  53. tutor/models.py +147 -0
  54. tutor/player/__init__.py +0 -0
  55. tutor/player/input_handler.py +45 -0
  56. tutor/player/player.py +308 -0
  57. tutor/player/player_display.py +117 -0
  58. tutor/prompts/curriculum.txt +67 -0
  59. tutor/prompts/dialogue.txt +62 -0
  60. tutor/prompts/narrate.txt +34 -0
  61. tutor/prompts/qa.txt +17 -0
  62. tutor/prompts/summarize.txt +9 -0
  63. tutor/prompts/visual.txt +60 -0
  64. tutor/prompts/visual_v3.txt +91 -0
  65. tutor/qa/__init__.py +0 -0
  66. tutor/qa/qa.py +105 -0
  67. tutor/requirements-dev.txt +2 -0
  68. tutor/requirements.txt +12 -0
  69. tutor/sample_docs/headingless_large.md +1 -0
  70. tutor/sample_docs/headingless_test.md +1 -0
  71. tutor/sample_docs/java-basics.md +78 -0
  72. tutor/tests/__init__.py +0 -0
  73. tutor/tests/audio/__init__.py +0 -0
  74. tutor/tests/audio/test_audio_builder.py +106 -0
  75. tutor/tests/audio/test_sanitizer.py +41 -0
  76. tutor/tests/cli/__init__.py +0 -0
  77. tutor/tests/cli/test_commands.py +67 -0
  78. tutor/tests/cli/test_video_commands.py +190 -0
  79. tutor/tests/e2e/README.md +61 -0
  80. tutor/tests/e2e/__init__.py +0 -0
  81. tutor/tests/e2e/conftest.py +117 -0
  82. tutor/tests/e2e/fixtures/README.md +17 -0
  83. tutor/tests/e2e/fixtures/sample.md +13 -0
  84. tutor/tests/e2e/test_audio_quality.py +40 -0
  85. tutor/tests/e2e/test_av_sync.py +56 -0
  86. tutor/tests/e2e/test_pipeline_smoke.py +37 -0
  87. tutor/tests/e2e/test_slide_render.py +72 -0
  88. tutor/tests/e2e/test_video_streams.py +104 -0
  89. tutor/tests/generation/__init__.py +0 -0
  90. tutor/tests/generation/conftest.py +134 -0
  91. tutor/tests/generation/test_assembler.py +64 -0
  92. tutor/tests/generation/test_curriculum.py +107 -0
  93. tutor/tests/generation/test_narrator.py +165 -0
  94. tutor/tests/generation/test_segment_edge_cases.py +280 -0
  95. tutor/tests/generation/test_segment_planner.py +324 -0
  96. tutor/tests/generation/test_visual_planner.py +319 -0
  97. tutor/tests/ingestion/__init__.py +0 -0
  98. tutor/tests/ingestion/test_chunker.py +94 -0
  99. tutor/tests/ingestion/test_doc_analyzer.py +51 -0
  100. tutor/tests/player/__init__.py +0 -0
  101. tutor/tests/player/test_player_states.py +88 -0
  102. tutor/tests/test_assets.py +39 -0
  103. tutor/tests/test_models_visual.py +180 -0
  104. tutor/tests/visual/__init__.py +0 -0
  105. tutor/tests/visual/test_beat_timer.py +321 -0
  106. tutor/tests/visual/test_pipeline_integration.py +178 -0
  107. tutor/tests/visual/test_slide_renderer.py +298 -0
  108. tutor/tests/visual/test_subtitle_writer.py +165 -0
  109. tutor/tests/visual/test_video_assembler.py +108 -0
  110. tutor/tests/visual/test_visual_pipeline.py +270 -0
  111. tutor/tutor.py +365 -0
  112. tutor/visual/__init__.py +213 -0
  113. tutor/visual/beat_timer.py +222 -0
  114. tutor/visual/slide_renderer.py +236 -0
  115. tutor/visual/subtitle_writer.py +187 -0
  116. tutor/visual/templates/_base.html.j2 +40 -0
  117. tutor/visual/templates/analogy.html.j2 +21 -0
  118. tutor/visual/templates/callout.html.j2 +10 -0
  119. tutor/visual/templates/code_example.html.j2 +12 -0
  120. tutor/visual/templates/comparison.html.j2 +28 -0
  121. tutor/visual/templates/decision_guide.html.j2 +37 -0
  122. tutor/visual/templates/definition.html.j2 +13 -0
  123. tutor/visual/templates/diagram.html.j2 +11 -0
  124. tutor/visual/templates/hook_question.html.j2 +17 -0
  125. tutor/visual/templates/key_insight.html.j2 +9 -0
  126. tutor/visual/templates/memory_hook.html.j2 +7 -0
  127. tutor/visual/templates/outro.html.j2 +16 -0
  128. tutor/visual/templates/question_prompt.html.j2 +13 -0
  129. tutor/visual/templates/step_sequence.html.j2 +14 -0
  130. tutor/visual/templates/title_card.html.j2 +12 -0
  131. tutor/visual/video_assembler.py +299 -0
tutor/cli/shell.py ADDED
@@ -0,0 +1,91 @@
1
+ import sys
2
+
3
+ from tutor.cli import theme
4
+ from tutor.cli.commands import COMMAND_MAP, ShellContext
5
+ from tutor.cli.logo import print_welcome
6
+ from tutor.cli.video_commands import cmd_video, cmd_vsessions
7
+
8
+ COMMAND_MAP["/video"] = cmd_video
9
+ COMMAND_MAP["/vsessions"] = cmd_vsessions
10
+
11
+
12
+ def _build_prompt(ctx: ShellContext) -> str:
13
+ p = ctx.player
14
+ if p is None or p._state == "STOPPED":
15
+ return f"{theme.CYAN}LearnX{theme.RESET} > "
16
+ icon = {"PLAYING": "▶", "PAUSED": "⏸", "ASKING": "?", "ANSWERING": "⟳"}.get(p._state, "·")
17
+ if p._current_idx < len(p.units):
18
+ unit = p.units[p._current_idx]
19
+ concept = unit.concept[:22]
20
+ idx_str = f"{p._current_idx + 1}/{len(p.units)}"
21
+ return f"{theme.CYAN}LearnX{theme.RESET} [{icon} {idx_str} {concept}] > "
22
+ return f"{theme.CYAN}LearnX{theme.RESET} [{icon}] > "
23
+
24
+
25
+ def run_shell() -> None:
26
+ _setup_utf8()
27
+ _prime_ffmpeg()
28
+ print_welcome()
29
+ ctx = ShellContext()
30
+
31
+ while True:
32
+ try:
33
+ line = input(_build_prompt(ctx)).strip()
34
+ except (KeyboardInterrupt, EOFError):
35
+ print()
36
+ _graceful_exit(ctx)
37
+ break
38
+
39
+ if not line:
40
+ continue
41
+
42
+ # Bare text while a session is active → route as /ask
43
+ if not line.startswith("/") and ctx.player and ctx.player._state not in ("STOPPED", None):
44
+ from tutor.cli.commands import cmd_ask
45
+
46
+ cmd_ask(line.split(), ctx)
47
+ continue
48
+
49
+ parts = line.split()
50
+ cmd = parts[0].lower() if parts else ""
51
+ tokens = parts[1:]
52
+
53
+ if cmd not in COMMAND_MAP:
54
+ print(theme.yellow(f" Unknown command: {cmd} — type /help for a list."))
55
+ continue
56
+
57
+ handler = COMMAND_MAP[cmd]
58
+ if handler is None:
59
+ _graceful_exit(ctx)
60
+ break
61
+
62
+ handler(tokens, ctx)
63
+
64
+
65
+ def _graceful_exit(ctx: ShellContext) -> None:
66
+ if ctx.player and ctx.player._state not in ("STOPPED",):
67
+ ctx.player._quit()
68
+ if ctx.player_thread:
69
+ ctx.player_thread.join(timeout=2.0)
70
+ print(theme.dim(" Goodbye!\n"))
71
+
72
+
73
+ def _prime_ffmpeg() -> None:
74
+ """Inject ffmpeg into PATH before pydub is imported so its warning never fires."""
75
+ try:
76
+ from tutor.config import _check_ffmpeg
77
+
78
+ _check_ffmpeg()
79
+ except Exception:
80
+ pass
81
+
82
+
83
+ def _setup_utf8() -> None:
84
+ if hasattr(sys.stdout, "buffer"):
85
+ import io
86
+
87
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
88
+ if hasattr(sys.stderr, "buffer"):
89
+ import io
90
+
91
+ sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from tutor.player.player import TutorPlayer
10
+
11
+
12
+ @dataclass
13
+ class ShellContext:
14
+ player: TutorPlayer | None = None
15
+ player_thread: threading.Thread | None = None
16
+ last_units_dir: Path | None = None
17
+ current_session: str | None = None
18
+ last_video: Path | None = None
tutor/cli/theme.py ADDED
@@ -0,0 +1,39 @@
1
+ import os
2
+ import sys
3
+
4
+ # Enable ANSI escape codes on Windows 10+
5
+ if sys.platform == "win32":
6
+ os.system("")
7
+
8
+ RESET = "\033[0m"
9
+ BOLD = "\033[1m"
10
+ DIM = "\033[2m"
11
+
12
+ RED = "\033[91m"
13
+ GREEN = "\033[92m"
14
+ YELLOW = "\033[93m"
15
+ CYAN = "\033[96m"
16
+
17
+
18
+ def red(s: str) -> str:
19
+ return f"{RED}{s}{RESET}"
20
+
21
+
22
+ def green(s: str) -> str:
23
+ return f"{GREEN}{s}{RESET}"
24
+
25
+
26
+ def yellow(s: str) -> str:
27
+ return f"{YELLOW}{s}{RESET}"
28
+
29
+
30
+ def cyan(s: str) -> str:
31
+ return f"{CYAN}{s}{RESET}"
32
+
33
+
34
+ def bold(s: str) -> str:
35
+ return f"{BOLD}{s}{RESET}"
36
+
37
+
38
+ def dim(s: str) -> str:
39
+ return f"{DIM}{s}{RESET}"
@@ -0,0 +1,123 @@
1
+ """
2
+ Shell command handlers for the video pipeline.
3
+ Separate from commands.py so the audio pipeline file stays under 400 lines.
4
+ """
5
+
6
+ import logging
7
+ from functools import partial
8
+ from pathlib import Path
9
+
10
+ from tutor.cli import theme
11
+ from tutor.cli.commands import AUDIO_DIR, ShellContext
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+ VIDEO_DIR = Path("video")
16
+
17
+
18
+ def cmd_video(tokens: list[str], ctx: ShellContext) -> None:
19
+ """Usage: /video [session-name]
20
+ Generate MP4 video for a session. Requires /generate to have run first."""
21
+ if not tokens:
22
+ if ctx.current_session:
23
+ session = ctx.current_session
24
+ else:
25
+ print(theme.red(" Usage: /video <session-name>"))
26
+ return
27
+ else:
28
+ session = tokens[0]
29
+
30
+ audio_session_dir = AUDIO_DIR / session
31
+ try:
32
+ _assert_audio_ready(audio_session_dir)
33
+ except Exception as e:
34
+ print(theme.red(f" Error: {e}"))
35
+ return
36
+
37
+ mp4_path = VIDEO_DIR / session / "full_session.mp4"
38
+ if mp4_path.exists():
39
+ if not _confirm_overwrite(mp4_path):
40
+ print(theme.dim(" Skipped."))
41
+ return
42
+
43
+ try:
44
+ _run_video_pipeline(session, ctx)
45
+ except KeyboardInterrupt:
46
+ print(theme.yellow("\n Cancelled."))
47
+ except Exception as e:
48
+ print(theme.red(f"\n Error: {e}\n"))
49
+ log.exception("Video pipeline failed for session %s", session)
50
+
51
+
52
+ def cmd_vsessions(tokens: list[str], ctx: ShellContext) -> None:
53
+ """Usage: /vsessions — list sessions that have a completed video."""
54
+ if not VIDEO_DIR.exists():
55
+ print(theme.dim(" No video sessions yet. Use /video <session> to create one."))
56
+ return
57
+
58
+ sessions = sorted(
59
+ d for d in VIDEO_DIR.iterdir() if d.is_dir() and (d / "full_session.mp4").exists()
60
+ )
61
+ if not sessions:
62
+ print(theme.dim(" No completed videos yet. Use /video <session-name>."))
63
+ return
64
+
65
+ print()
66
+ for s in sessions:
67
+ mp4 = s / "full_session.mp4"
68
+ size_mb = mp4.stat().st_size / 1_048_576
69
+ print(f" {theme.cyan(s.name):<30} {theme.green('[mp4]')} {size_mb:.0f} MB")
70
+ print(theme.dim("\n Play with your video player: vlc video/<session>/full_session.mp4"))
71
+ print()
72
+
73
+
74
+ def _run_video_pipeline(session: str, ctx: ShellContext) -> None:
75
+ """Resolve paths and run the full visual pipeline."""
76
+ from tutor.config import load_config
77
+ from tutor.infra import llm as _llm
78
+ from tutor.visual import run_visual_pipeline
79
+
80
+ config = load_config()
81
+ provider = "groq"
82
+ llm_fn = partial(_llm.chat, provider=provider, config=config)
83
+ video_dir = VIDEO_DIR / session
84
+ video_dir.mkdir(parents=True, exist_ok=True)
85
+
86
+ audio_dir = AUDIO_DIR / session
87
+ units = list((audio_dir / "tutorial_units").glob("unit_*.mp3"))
88
+ print(f"\n Resolving session {theme.bold(session)}...")
89
+ print(f" Found {len(units)} units. Starting visual pipeline.")
90
+
91
+ result = run_visual_pipeline(session, audio_dir, video_dir, llm_fn, difficulty="beginner")
92
+ ctx.last_video = result
93
+ ctx.current_session = session
94
+
95
+
96
+ def _assert_audio_ready(audio_session_dir: Path) -> None:
97
+ """Raise ValueError if the audio session is not ready."""
98
+ if not audio_session_dir.exists():
99
+ raise ValueError(
100
+ f"Session '{audio_session_dir.name}' not found in {AUDIO_DIR}/.\n"
101
+ " Run /generate first to produce audio."
102
+ )
103
+ units_json = audio_session_dir / "tutorial.units.json"
104
+ if not units_json.exists():
105
+ raise ValueError(
106
+ f"tutorial.units.json not found in {audio_session_dir}.\n"
107
+ " The audio pipeline must complete before running /video."
108
+ )
109
+ mp3s = list((audio_session_dir / "tutorial_units").glob("unit_*.mp3"))
110
+ if not mp3s:
111
+ raise ValueError(
112
+ f"No MP3 files found in {audio_session_dir / 'tutorial_units'}.\n"
113
+ " Run /generate (without --script-only) to produce audio."
114
+ )
115
+
116
+
117
+ def _confirm_overwrite(mp4_path: Path) -> bool:
118
+ """Prompt if full_session.mp4 already exists. Returns True to proceed."""
119
+ try:
120
+ answer = input(" Session already has a video. Regenerate? [y/N]: ").strip().lower()
121
+ return answer == "y"
122
+ except (EOFError, KeyboardInterrupt):
123
+ return False
tutor/config.py ADDED
@@ -0,0 +1,122 @@
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ from tutor.exceptions import ConfigError
10
+
11
+
12
+ @dataclass
13
+ class Config:
14
+ groq_api_key: str = ""
15
+ openrouter_api_key: str = ""
16
+ default_provider: str = "groq"
17
+
18
+
19
+ def load_config() -> Config:
20
+ load_dotenv(Path(__file__).parent / ".env")
21
+ return Config(
22
+ groq_api_key=os.getenv("GROQ_API_KEY", ""),
23
+ openrouter_api_key=os.getenv("OPENROUTER_API_KEY", ""),
24
+ )
25
+
26
+
27
+ def preflight(input_path: str, provider: str, mode: str) -> Config:
28
+ config = load_config()
29
+
30
+ if mode not in ("inspect", "dry-run", "script-only"):
31
+ if input_path is None:
32
+ raise ConfigError(
33
+ "No input file specified.\n Usage: python tutor.py <input.md> [options]"
34
+ )
35
+
36
+ if input_path is not None:
37
+ p = Path(input_path)
38
+ if not p.exists():
39
+ raise ConfigError(
40
+ f"Input file not found: {input_path}\n Check the path and try again."
41
+ )
42
+ if p.suffix.lower() != ".md":
43
+ raise ConfigError(
44
+ f"Input file must be a .md file, got: {p.suffix}\n Only Markdown files are supported."
45
+ )
46
+
47
+ if provider == "groq" and not config.groq_api_key:
48
+ raise ConfigError(
49
+ "GROQ_API_KEY not set.\n"
50
+ " Add it to tutor/.env: GROQ_API_KEY=gsk_...\n"
51
+ " Get a free key at: console.groq.com"
52
+ )
53
+
54
+ if provider == "openrouter" and not config.openrouter_api_key:
55
+ raise ConfigError(
56
+ "OPENROUTER_API_KEY not set.\n"
57
+ " Add it to tutor/.env: OPENROUTER_API_KEY=sk-or-...\n"
58
+ " Sign up at: openrouter.ai"
59
+ )
60
+
61
+ if mode not in ("script-only", "dry-run", "inspect") and input_path:
62
+ out_parent = Path(input_path).parent
63
+ if not os.access(out_parent, os.W_OK):
64
+ raise ConfigError(
65
+ f"Output directory is not writable: {out_parent}\n"
66
+ " Check permissions or specify a different --output path."
67
+ )
68
+
69
+ if mode == "generate":
70
+ _check_ffmpeg()
71
+
72
+ return config
73
+
74
+
75
+ def _check_ffmpeg() -> None:
76
+ try:
77
+ subprocess.run(
78
+ ["ffmpeg", "-version"],
79
+ stdout=subprocess.DEVNULL,
80
+ stderr=subprocess.DEVNULL,
81
+ check=True,
82
+ )
83
+ return
84
+ except (FileNotFoundError, subprocess.CalledProcessError):
85
+ pass
86
+
87
+ # PATH doesn't have ffmpeg — probe common Windows install layouts.
88
+ if sys.platform == "win32":
89
+ _winget_pkgs = Path.home() / "AppData/Local/Microsoft/WinGet/Packages"
90
+ candidates: list[Path] = [
91
+ Path("C:/ffmpeg/bin/ffmpeg.exe"),
92
+ *Path("C:/ffmpeg").glob("*/bin/ffmpeg.exe"),
93
+ Path("C:/Program Files/ffmpeg/bin/ffmpeg.exe"),
94
+ *Path("C:/Program Files/ffmpeg").glob("*/bin/ffmpeg.exe"),
95
+ Path("C:/tools/ffmpeg/bin/ffmpeg.exe"),
96
+ *(_winget_pkgs.glob("Gyan.FFmpeg*/*/bin/ffmpeg.exe") if _winget_pkgs.exists() else []),
97
+ *(_winget_pkgs.glob("*/ffmpeg*/bin/ffmpeg.exe") if _winget_pkgs.exists() else []),
98
+ ]
99
+ for candidate in candidates:
100
+ if candidate.exists():
101
+ _inject_ffmpeg(candidate.parent)
102
+ return
103
+
104
+ raise ConfigError(
105
+ "ffmpeg not found in PATH.\n"
106
+ " Install with: winget install ffmpeg\n"
107
+ " Or add its bin\\ folder to your system PATH and restart the terminal."
108
+ )
109
+
110
+
111
+ def _inject_ffmpeg(bin_dir: Path) -> None:
112
+ """Add a discovered ffmpeg directory to the process PATH so pydub finds it."""
113
+ os.environ["PATH"] = str(bin_dir) + os.pathsep + os.environ.get("PATH", "")
114
+ # Patch pydub's runtime converter path if pydub is already imported.
115
+ try:
116
+ import pydub
117
+
118
+ pydub.AudioSegment.converter = str(bin_dir / "ffmpeg.exe")
119
+ pydub.AudioSegment.ffmpeg = str(bin_dir / "ffmpeg.exe")
120
+ pydub.AudioSegment.ffprobe = str(bin_dir / "ffprobe.exe")
121
+ except Exception:
122
+ pass
tutor/conftest.py ADDED
@@ -0,0 +1,5 @@
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ # Allow `from tutor.x import y` when pytest is run from inside the tutor/ directory
5
+ sys.path.insert(0, str(Path(__file__).parent.parent))
tutor/constants.py ADDED
@@ -0,0 +1,82 @@
1
+ # Audio
2
+ WPM = 130
3
+ SILENCE_BREATH_MS = 150
4
+ SILENCE_TURN_MS = 500
5
+ SILENCE_UNIT_MS = 1200
6
+ SILENCE_SESSION_MS = 800
7
+ TTS_SEMAPHORE_LIMIT = 8
8
+
9
+ # Voices
10
+ VOICE_TUTOR = "en-US-GuyNeural"
11
+ VOICE_STUDENT = "en-US-JennyNeural"
12
+ VOICE_COTUTOR = "en-US-SaraNeural"
13
+ RATE_TUTOR = "+0%"
14
+ RATE_STUDENT = "+5%"
15
+ RATE_COTUTOR = "+0%"
16
+
17
+ # Ingestion
18
+ STRATEGY_A_TOKEN_LIMIT = 6_000
19
+ STRATEGY_B_TOKEN_LIMIT = 60_000
20
+ MAX_CHUNK_TOKENS = 4_000
21
+ MIN_CHUNK_TOKENS = 50
22
+ SUMMARY_CACHE_DIR = ".tutor_cache"
23
+ STRATEGY_C_WINDOW_TOKENS = 2_000
24
+ STRATEGY_C_OVERLAP_TOKENS = 200
25
+
26
+ # Complexity
27
+ WORDS_PER_COMPLEXITY: dict[int, int] = {1: 200, 2: 380, 3: 580}
28
+ OVERHEAD_WORDS = 200 # intro + transitions + outro
29
+
30
+ # Player
31
+ PLAYER_POLL_HZ = 10
32
+ PLAYER_BAR_WIDTH = 40
33
+
34
+ # Difficulty
35
+ DIFFICULTY_CONTEXT: dict[str, str] = {
36
+ "beginner": (
37
+ "The student has never written Java before. "
38
+ "Prioritise Tier 0-2 concepts. Analogies are mandatory. "
39
+ "Set max complexity to 2. Word budget multiplier: 1.3."
40
+ ),
41
+ "intermediate": (
42
+ "The student has written Java for 3 months. "
43
+ "Assume JVM basics are known. Use Tier 1-4 concepts. "
44
+ "Word budget multiplier: 1.0."
45
+ ),
46
+ "advanced": (
47
+ "The student knows OOP but makes design-level mistakes. "
48
+ "Focus on Tier 3-6: contracts, concurrency, edge cases. "
49
+ "Word budget multiplier: 0.8."
50
+ ),
51
+ }
52
+
53
+ DIFFICULTY_MULTIPLIERS: dict[str, float] = {
54
+ "beginner": 1.3,
55
+ "intermediate": 1.0,
56
+ "advanced": 0.8,
57
+ }
58
+
59
+ # Source/summarise token limits live in tutor/llm_config.toml (limits section)
60
+
61
+ # Versioning
62
+ PROMPT_VERSION = "v1"
63
+ MAX_UNITS = 8
64
+ MIN_UNITS = 3
65
+ DEFAULT_DURATION_MIN = 20
66
+ DEFAULT_DIFFICULTY = "beginner"
67
+ DEFAULT_FORMAT = "tutor-student"
68
+ DEFAULT_SUBJECT = "java"
69
+
70
+ # Code-to-speech substitutions (pattern, replacement)
71
+ CODE_SUBSTITUTIONS = [
72
+ (r"List<String>", "a List of Strings"),
73
+ (r"HashMap<(\w+),\s*(\w+)>", r"a HashMap from \1 to \2"),
74
+ (r"!=", "not equal to"),
75
+ (r"(?<![=!<>])==(?![=])", "double equals"),
76
+ (r"\.equals\(", "dot equals("),
77
+ (r"@(\w+)", r"\1 annotation"),
78
+ (r"(\w+)\[\]", r"\1 array"),
79
+ (r"NullPointerException", "Null Pointer Exception"),
80
+ (r"StackOverflowError", "Stack Overflow Error"),
81
+ (r"IllegalArgumentException", "Illegal Argument Exception"),
82
+ ]
tutor/exceptions.py ADDED
@@ -0,0 +1,26 @@
1
+ class TutorError(Exception):
2
+ """Base for all tutor AI errors."""
3
+
4
+
5
+ class IngestionError(TutorError):
6
+ """Raised when doc parsing or chunking fails."""
7
+
8
+
9
+ class LLMError(TutorError):
10
+ """Raised when an LLM call fails or returns unparseable output."""
11
+
12
+
13
+ class TTSError(TutorError):
14
+ """Raised when audio rendering fails."""
15
+
16
+
17
+ class PlayerError(TutorError):
18
+ """Raised when the interactive player encounters an unrecoverable state."""
19
+
20
+
21
+ class ConfigError(TutorError):
22
+ """Raised when required config (API key, ffmpeg) is missing."""
23
+
24
+
25
+ class VideoError(TutorError):
26
+ """Raised when any step of the video pipeline fails."""
File without changes
@@ -0,0 +1,81 @@
1
+ from tutor.audio import sanitizer
2
+ from tutor.models import DialogueLine, TeachingUnit
3
+
4
+
5
+ def assemble(
6
+ units: list[TeachingUnit],
7
+ all_lines: list[list[DialogueLine]],
8
+ fmt: str,
9
+ doc_title: str,
10
+ mode: str = "conversation",
11
+ ) -> list[DialogueLine]:
12
+ result: list[DialogueLine] = []
13
+
14
+ result.extend(_build_intro(units, doc_title, mode))
15
+
16
+ for i, (unit, lines) in enumerate(zip(units, all_lines, strict=False)):
17
+ result.extend(lines)
18
+ if mode == "conversation" and i < len(units) - 1:
19
+ next_concept = units[i + 1].concept if i + 1 < len(units) else ""
20
+ result.append(
21
+ DialogueLine(
22
+ speaker="MAYA",
23
+ text=f"Alright, let's move on to the next one: {next_concept}.",
24
+ unit_number=unit.unit,
25
+ )
26
+ )
27
+
28
+ result.extend(_build_outro(units, doc_title, mode))
29
+
30
+ for line in result:
31
+ line.text = sanitizer.apply(line.text)
32
+
33
+ return result
34
+
35
+
36
+ def _build_intro(units: list[TeachingUnit], doc_title: str, mode: str) -> list[DialogueLine]:
37
+ if mode == "explain":
38
+ text = (
39
+ f"Let's walk through {doc_title}. "
40
+ f"I'll cover {len(units)} section{'s' if len(units) != 1 else ''} from top to bottom, "
41
+ f"following the document as you read along."
42
+ )
43
+ return [DialogueLine(speaker="ALEX", text=text, unit_number=0)]
44
+ else:
45
+ concepts = ", ".join(u.concept for u in units)
46
+ alex_text = (
47
+ f"Welcome. In this session we're covering {doc_title}. "
48
+ f"We'll walk through {len(units)} concept{'s' if len(units) != 1 else ''} step by step: {concepts}."
49
+ )
50
+ maya_text = (
51
+ "We'll explain each one clearly, with analogies, so by the end you'll have "
52
+ "a solid picture of how it all fits together. Let's get into it."
53
+ )
54
+ return [
55
+ DialogueLine(speaker="ALEX", text=alex_text, unit_number=0),
56
+ DialogueLine(speaker="MAYA", text=maya_text, unit_number=0),
57
+ ]
58
+
59
+
60
+ def _build_outro(units: list[TeachingUnit], doc_title: str, mode: str) -> list[DialogueLine]:
61
+ if mode == "explain":
62
+ text = (
63
+ f"That covers all {len(units)} section{'s' if len(units) != 1 else ''} of {doc_title}. "
64
+ f"You can replay any section with the replay command, or ask a question with ask."
65
+ )
66
+ return [DialogueLine(speaker="ALEX", text=text, unit_number=-1)]
67
+ else:
68
+ hooks = ". ".join(u.memory_hook for u in units if u.memory_hook)
69
+ alex_text = (
70
+ f"That's everything for {doc_title}. "
71
+ f"We covered {len(units)} concept{'s' if len(units) != 1 else ''} today. "
72
+ f"Here's what to hold onto: {hooks}."
73
+ )
74
+ maya_text = (
75
+ "If any of those didn't fully click, replay that unit and let it settle. "
76
+ "These are the ideas that show up constantly in real Java code."
77
+ )
78
+ return [
79
+ DialogueLine(speaker="ALEX", text=alex_text, unit_number=-1),
80
+ DialogueLine(speaker="MAYA", text=maya_text, unit_number=-1),
81
+ ]
@@ -0,0 +1,97 @@
1
+ import logging
2
+
3
+ from tutor.constants import (
4
+ DIFFICULTY_CONTEXT,
5
+ DIFFICULTY_MULTIPLIERS,
6
+ OVERHEAD_WORDS,
7
+ WORDS_PER_COMPLEXITY,
8
+ WPM,
9
+ )
10
+ from tutor.exceptions import LLMError
11
+ from tutor.infra.llm import LLMFn, load_prompt, parse_json_response
12
+ from tutor.models import Chunk, DocProfile, TeachingUnit
13
+
14
+ log = logging.getLogger(__name__)
15
+
16
+
17
+ def plan(
18
+ chunks: list[Chunk],
19
+ profile: DocProfile,
20
+ duration_min: int,
21
+ llm_fn: LLMFn,
22
+ difficulty: str = "beginner",
23
+ topic: str | None = None,
24
+ ) -> list[TeachingUnit]:
25
+ summaries = "\n".join(f"[{c.chunk_id}] {c.summary}" for c in chunks)
26
+ difficulty_context = DIFFICULTY_CONTEXT.get(difficulty, DIFFICULTY_CONTEXT["beginner"])
27
+
28
+ prompt = load_prompt("curriculum.txt").format(
29
+ doc_title=profile.filepath,
30
+ duration_min=duration_min,
31
+ difficulty=difficulty,
32
+ difficulty_context=difficulty_context,
33
+ summaries=summaries,
34
+ )
35
+
36
+ if topic:
37
+ topic_instruction = (
38
+ f'IMPORTANT: You must include a unit that covers the topic "{topic}". '
39
+ "If the source document does not mention it, create a unit that acknowledges "
40
+ "it is out of scope but explains why it matters in relation to what was covered."
41
+ )
42
+ prompt = topic_instruction + "\n\n" + prompt
43
+
44
+ messages = [{"role": "user", "content": prompt}]
45
+ log.info("Planning curriculum for %d chunks, %d min target", len(chunks), duration_min)
46
+
47
+ raw = llm_fn(messages, call_type="curriculum")
48
+ try:
49
+ data = parse_json_response(raw)
50
+ except LLMError:
51
+ retry_messages = messages + [
52
+ {"role": "assistant", "content": raw},
53
+ {
54
+ "role": "user",
55
+ "content": "Your previous response could not be parsed as JSON. Reply with the raw JSON array only, no other text.",
56
+ },
57
+ ]
58
+ raw = llm_fn(retry_messages, call_type="curriculum")
59
+ data = parse_json_response(raw)
60
+
61
+ if not isinstance(data, list) or len(data) == 0:
62
+ raise LLMError("Curriculum planner returned no units")
63
+
64
+ total_budget = duration_min * WPM - OVERHEAD_WORDS
65
+ total_complexity = sum(int(u.get("complexity", 2)) for u in data)
66
+ if total_complexity == 0:
67
+ total_complexity = len(data)
68
+ multiplier = DIFFICULTY_MULTIPLIERS.get(difficulty, 1.0)
69
+ base = total_budget / total_complexity
70
+
71
+ units: list[TeachingUnit] = []
72
+ for i, u in enumerate(data):
73
+ complexity = max(1, min(3, int(u.get("complexity", 2))))
74
+ word_budget = max(
75
+ round(base * complexity * multiplier),
76
+ WORDS_PER_COMPLEXITY[1], # floor: min 200 words even for advanced
77
+ )
78
+ units.append(
79
+ TeachingUnit(
80
+ unit=i + 1,
81
+ concept=u.get("concept", f"Unit {i + 1}"),
82
+ source_sections=u.get("source_sections", []),
83
+ complexity=complexity,
84
+ word_budget=word_budget,
85
+ key_facts=u.get("key_facts", []),
86
+ common_misconception=u.get("common_misconception", ""),
87
+ good_analogy=u.get("good_analogy", ""),
88
+ question_style=u.get("question_style", "recall"),
89
+ memory_hook=u.get("memory_hook", ""),
90
+ prerequisite_concepts=u.get("prerequisite_concepts", []),
91
+ js_contrast=u.get("js_contrast", ""),
92
+ production_relevance=u.get("production_relevance", ""),
93
+ )
94
+ )
95
+
96
+ log.info("Curriculum planned: %d units", len(units))
97
+ return units