learnx-cli 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- learnx_cli-0.3.0.dist-info/METADATA +240 -0
- learnx_cli-0.3.0.dist-info/RECORD +131 -0
- learnx_cli-0.3.0.dist-info/WHEEL +4 -0
- learnx_cli-0.3.0.dist-info/entry_points.txt +2 -0
- tutor/.env copy.example +4 -0
- tutor/__init__.py +0 -0
- tutor/__main__.py +4 -0
- tutor/assets/__init__.py +5 -0
- tutor/assets/html/fonts/Inter-Bold.woff2 +0 -0
- tutor/assets/html/fonts/Inter-Regular.woff2 +0 -0
- tutor/assets/html/fonts/Inter-SemiBold.woff2 +0 -0
- tutor/assets/html/fonts/JetBrainsMono-Regular.woff2 +0 -0
- tutor/assets/html/highlight-java.min.js +2 -0
- tutor/assets/html/highlight-javascript.min.js +2 -0
- tutor/assets/html/highlight-python.min.js +2 -0
- tutor/assets/html/highlight.min.js +17 -0
- tutor/assets/html/mermaid.min.js +31 -0
- tutor/assets/html/slide_base.css +464 -0
- tutor/assets/html/theme-learnx-dark.css +12 -0
- tutor/audio/__init__.py +0 -0
- tutor/audio/audio_builder.py +143 -0
- tutor/audio/sanitizer.py +9 -0
- tutor/audio/tts_renderer.py +54 -0
- tutor/cli/__init__.py +0 -0
- tutor/cli/commands.py +391 -0
- tutor/cli/logo.py +21 -0
- tutor/cli/playback_commands.py +239 -0
- tutor/cli/shell.py +91 -0
- tutor/cli/shell_context.py +18 -0
- tutor/cli/theme.py +39 -0
- tutor/cli/video_commands.py +123 -0
- tutor/config.py +122 -0
- tutor/conftest.py +5 -0
- tutor/constants.py +82 -0
- tutor/exceptions.py +26 -0
- tutor/generation/__init__.py +0 -0
- tutor/generation/assembler.py +81 -0
- tutor/generation/curriculum.py +97 -0
- tutor/generation/dialogue.py +172 -0
- tutor/generation/narrator.py +122 -0
- tutor/generation/segment_parser.py +223 -0
- tutor/generation/segment_planner.py +200 -0
- tutor/generation/visual_planner.py +205 -0
- tutor/infra/__init__.py +0 -0
- tutor/infra/llm.py +152 -0
- tutor/ingestion/__init__.py +0 -0
- tutor/ingestion/chunker.py +171 -0
- tutor/ingestion/doc_analyzer.py +41 -0
- tutor/ingestion/parse_content.py +19 -0
- tutor/ingestion/summarizer.py +51 -0
- tutor/inspector.py +117 -0
- tutor/llm_config.toml +58 -0
- tutor/models.py +147 -0
- tutor/player/__init__.py +0 -0
- tutor/player/input_handler.py +45 -0
- tutor/player/player.py +308 -0
- tutor/player/player_display.py +117 -0
- tutor/prompts/curriculum.txt +67 -0
- tutor/prompts/dialogue.txt +62 -0
- tutor/prompts/narrate.txt +34 -0
- tutor/prompts/qa.txt +17 -0
- tutor/prompts/summarize.txt +9 -0
- tutor/prompts/visual.txt +60 -0
- tutor/prompts/visual_v3.txt +91 -0
- tutor/qa/__init__.py +0 -0
- tutor/qa/qa.py +105 -0
- tutor/requirements-dev.txt +2 -0
- tutor/requirements.txt +12 -0
- tutor/sample_docs/headingless_large.md +1 -0
- tutor/sample_docs/headingless_test.md +1 -0
- tutor/sample_docs/java-basics.md +78 -0
- tutor/tests/__init__.py +0 -0
- tutor/tests/audio/__init__.py +0 -0
- tutor/tests/audio/test_audio_builder.py +106 -0
- tutor/tests/audio/test_sanitizer.py +41 -0
- tutor/tests/cli/__init__.py +0 -0
- tutor/tests/cli/test_commands.py +67 -0
- tutor/tests/cli/test_video_commands.py +190 -0
- tutor/tests/e2e/README.md +61 -0
- tutor/tests/e2e/__init__.py +0 -0
- tutor/tests/e2e/conftest.py +117 -0
- tutor/tests/e2e/fixtures/README.md +17 -0
- tutor/tests/e2e/fixtures/sample.md +13 -0
- tutor/tests/e2e/test_audio_quality.py +40 -0
- tutor/tests/e2e/test_av_sync.py +56 -0
- tutor/tests/e2e/test_pipeline_smoke.py +37 -0
- tutor/tests/e2e/test_slide_render.py +72 -0
- tutor/tests/e2e/test_video_streams.py +104 -0
- tutor/tests/generation/__init__.py +0 -0
- tutor/tests/generation/conftest.py +134 -0
- tutor/tests/generation/test_assembler.py +64 -0
- tutor/tests/generation/test_curriculum.py +107 -0
- tutor/tests/generation/test_narrator.py +165 -0
- tutor/tests/generation/test_segment_edge_cases.py +280 -0
- tutor/tests/generation/test_segment_planner.py +324 -0
- tutor/tests/generation/test_visual_planner.py +319 -0
- tutor/tests/ingestion/__init__.py +0 -0
- tutor/tests/ingestion/test_chunker.py +94 -0
- tutor/tests/ingestion/test_doc_analyzer.py +51 -0
- tutor/tests/player/__init__.py +0 -0
- tutor/tests/player/test_player_states.py +88 -0
- tutor/tests/test_assets.py +39 -0
- tutor/tests/test_models_visual.py +180 -0
- tutor/tests/visual/__init__.py +0 -0
- tutor/tests/visual/test_beat_timer.py +321 -0
- tutor/tests/visual/test_pipeline_integration.py +178 -0
- tutor/tests/visual/test_slide_renderer.py +298 -0
- tutor/tests/visual/test_subtitle_writer.py +165 -0
- tutor/tests/visual/test_video_assembler.py +108 -0
- tutor/tests/visual/test_visual_pipeline.py +270 -0
- tutor/tutor.py +365 -0
- tutor/visual/__init__.py +213 -0
- tutor/visual/beat_timer.py +222 -0
- tutor/visual/slide_renderer.py +236 -0
- tutor/visual/subtitle_writer.py +187 -0
- tutor/visual/templates/_base.html.j2 +40 -0
- tutor/visual/templates/analogy.html.j2 +21 -0
- tutor/visual/templates/callout.html.j2 +10 -0
- tutor/visual/templates/code_example.html.j2 +12 -0
- tutor/visual/templates/comparison.html.j2 +28 -0
- tutor/visual/templates/decision_guide.html.j2 +37 -0
- tutor/visual/templates/definition.html.j2 +13 -0
- tutor/visual/templates/diagram.html.j2 +11 -0
- tutor/visual/templates/hook_question.html.j2 +17 -0
- tutor/visual/templates/key_insight.html.j2 +9 -0
- tutor/visual/templates/memory_hook.html.j2 +7 -0
- tutor/visual/templates/outro.html.j2 +16 -0
- tutor/visual/templates/question_prompt.html.j2 +13 -0
- tutor/visual/templates/step_sequence.html.j2 +14 -0
- tutor/visual/templates/title_card.html.j2 +12 -0
- tutor/visual/video_assembler.py +299 -0
tutor/cli/shell.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
from tutor.cli import theme
|
|
4
|
+
from tutor.cli.commands import COMMAND_MAP, ShellContext
|
|
5
|
+
from tutor.cli.logo import print_welcome
|
|
6
|
+
from tutor.cli.video_commands import cmd_video, cmd_vsessions
|
|
7
|
+
|
|
8
|
+
COMMAND_MAP["/video"] = cmd_video
|
|
9
|
+
COMMAND_MAP["/vsessions"] = cmd_vsessions
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _build_prompt(ctx: ShellContext) -> str:
|
|
13
|
+
p = ctx.player
|
|
14
|
+
if p is None or p._state == "STOPPED":
|
|
15
|
+
return f"{theme.CYAN}LearnX{theme.RESET} > "
|
|
16
|
+
icon = {"PLAYING": "▶", "PAUSED": "⏸", "ASKING": "?", "ANSWERING": "⟳"}.get(p._state, "·")
|
|
17
|
+
if p._current_idx < len(p.units):
|
|
18
|
+
unit = p.units[p._current_idx]
|
|
19
|
+
concept = unit.concept[:22]
|
|
20
|
+
idx_str = f"{p._current_idx + 1}/{len(p.units)}"
|
|
21
|
+
return f"{theme.CYAN}LearnX{theme.RESET} [{icon} {idx_str} {concept}] > "
|
|
22
|
+
return f"{theme.CYAN}LearnX{theme.RESET} [{icon}] > "
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run_shell() -> None:
|
|
26
|
+
_setup_utf8()
|
|
27
|
+
_prime_ffmpeg()
|
|
28
|
+
print_welcome()
|
|
29
|
+
ctx = ShellContext()
|
|
30
|
+
|
|
31
|
+
while True:
|
|
32
|
+
try:
|
|
33
|
+
line = input(_build_prompt(ctx)).strip()
|
|
34
|
+
except (KeyboardInterrupt, EOFError):
|
|
35
|
+
print()
|
|
36
|
+
_graceful_exit(ctx)
|
|
37
|
+
break
|
|
38
|
+
|
|
39
|
+
if not line:
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
# Bare text while a session is active → route as /ask
|
|
43
|
+
if not line.startswith("/") and ctx.player and ctx.player._state not in ("STOPPED", None):
|
|
44
|
+
from tutor.cli.commands import cmd_ask
|
|
45
|
+
|
|
46
|
+
cmd_ask(line.split(), ctx)
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
parts = line.split()
|
|
50
|
+
cmd = parts[0].lower() if parts else ""
|
|
51
|
+
tokens = parts[1:]
|
|
52
|
+
|
|
53
|
+
if cmd not in COMMAND_MAP:
|
|
54
|
+
print(theme.yellow(f" Unknown command: {cmd} — type /help for a list."))
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
handler = COMMAND_MAP[cmd]
|
|
58
|
+
if handler is None:
|
|
59
|
+
_graceful_exit(ctx)
|
|
60
|
+
break
|
|
61
|
+
|
|
62
|
+
handler(tokens, ctx)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _graceful_exit(ctx: ShellContext) -> None:
|
|
66
|
+
if ctx.player and ctx.player._state not in ("STOPPED",):
|
|
67
|
+
ctx.player._quit()
|
|
68
|
+
if ctx.player_thread:
|
|
69
|
+
ctx.player_thread.join(timeout=2.0)
|
|
70
|
+
print(theme.dim(" Goodbye!\n"))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _prime_ffmpeg() -> None:
|
|
74
|
+
"""Inject ffmpeg into PATH before pydub is imported so its warning never fires."""
|
|
75
|
+
try:
|
|
76
|
+
from tutor.config import _check_ffmpeg
|
|
77
|
+
|
|
78
|
+
_check_ffmpeg()
|
|
79
|
+
except Exception:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _setup_utf8() -> None:
|
|
84
|
+
if hasattr(sys.stdout, "buffer"):
|
|
85
|
+
import io
|
|
86
|
+
|
|
87
|
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
88
|
+
if hasattr(sys.stderr, "buffer"):
|
|
89
|
+
import io
|
|
90
|
+
|
|
91
|
+
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from tutor.player.player import TutorPlayer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ShellContext:
|
|
14
|
+
player: TutorPlayer | None = None
|
|
15
|
+
player_thread: threading.Thread | None = None
|
|
16
|
+
last_units_dir: Path | None = None
|
|
17
|
+
current_session: str | None = None
|
|
18
|
+
last_video: Path | None = None
|
tutor/cli/theme.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
# Enable ANSI escape codes on Windows 10+
|
|
5
|
+
if sys.platform == "win32":
|
|
6
|
+
os.system("")
|
|
7
|
+
|
|
8
|
+
RESET = "\033[0m"
|
|
9
|
+
BOLD = "\033[1m"
|
|
10
|
+
DIM = "\033[2m"
|
|
11
|
+
|
|
12
|
+
RED = "\033[91m"
|
|
13
|
+
GREEN = "\033[92m"
|
|
14
|
+
YELLOW = "\033[93m"
|
|
15
|
+
CYAN = "\033[96m"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def red(s: str) -> str:
|
|
19
|
+
return f"{RED}{s}{RESET}"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def green(s: str) -> str:
|
|
23
|
+
return f"{GREEN}{s}{RESET}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def yellow(s: str) -> str:
|
|
27
|
+
return f"{YELLOW}{s}{RESET}"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def cyan(s: str) -> str:
|
|
31
|
+
return f"{CYAN}{s}{RESET}"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def bold(s: str) -> str:
|
|
35
|
+
return f"{BOLD}{s}{RESET}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def dim(s: str) -> str:
|
|
39
|
+
return f"{DIM}{s}{RESET}"
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shell command handlers for the video pipeline.
|
|
3
|
+
Separate from commands.py so the audio pipeline file stays under 400 lines.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from functools import partial
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from tutor.cli import theme
|
|
11
|
+
from tutor.cli.commands import AUDIO_DIR, ShellContext
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
VIDEO_DIR = Path("video")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def cmd_video(tokens: list[str], ctx: ShellContext) -> None:
|
|
19
|
+
"""Usage: /video [session-name]
|
|
20
|
+
Generate MP4 video for a session. Requires /generate to have run first."""
|
|
21
|
+
if not tokens:
|
|
22
|
+
if ctx.current_session:
|
|
23
|
+
session = ctx.current_session
|
|
24
|
+
else:
|
|
25
|
+
print(theme.red(" Usage: /video <session-name>"))
|
|
26
|
+
return
|
|
27
|
+
else:
|
|
28
|
+
session = tokens[0]
|
|
29
|
+
|
|
30
|
+
audio_session_dir = AUDIO_DIR / session
|
|
31
|
+
try:
|
|
32
|
+
_assert_audio_ready(audio_session_dir)
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print(theme.red(f" Error: {e}"))
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
mp4_path = VIDEO_DIR / session / "full_session.mp4"
|
|
38
|
+
if mp4_path.exists():
|
|
39
|
+
if not _confirm_overwrite(mp4_path):
|
|
40
|
+
print(theme.dim(" Skipped."))
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
_run_video_pipeline(session, ctx)
|
|
45
|
+
except KeyboardInterrupt:
|
|
46
|
+
print(theme.yellow("\n Cancelled."))
|
|
47
|
+
except Exception as e:
|
|
48
|
+
print(theme.red(f"\n Error: {e}\n"))
|
|
49
|
+
log.exception("Video pipeline failed for session %s", session)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def cmd_vsessions(tokens: list[str], ctx: ShellContext) -> None:
|
|
53
|
+
"""Usage: /vsessions — list sessions that have a completed video."""
|
|
54
|
+
if not VIDEO_DIR.exists():
|
|
55
|
+
print(theme.dim(" No video sessions yet. Use /video <session> to create one."))
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
sessions = sorted(
|
|
59
|
+
d for d in VIDEO_DIR.iterdir() if d.is_dir() and (d / "full_session.mp4").exists()
|
|
60
|
+
)
|
|
61
|
+
if not sessions:
|
|
62
|
+
print(theme.dim(" No completed videos yet. Use /video <session-name>."))
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
print()
|
|
66
|
+
for s in sessions:
|
|
67
|
+
mp4 = s / "full_session.mp4"
|
|
68
|
+
size_mb = mp4.stat().st_size / 1_048_576
|
|
69
|
+
print(f" {theme.cyan(s.name):<30} {theme.green('[mp4]')} {size_mb:.0f} MB")
|
|
70
|
+
print(theme.dim("\n Play with your video player: vlc video/<session>/full_session.mp4"))
|
|
71
|
+
print()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _run_video_pipeline(session: str, ctx: ShellContext) -> None:
|
|
75
|
+
"""Resolve paths and run the full visual pipeline."""
|
|
76
|
+
from tutor.config import load_config
|
|
77
|
+
from tutor.infra import llm as _llm
|
|
78
|
+
from tutor.visual import run_visual_pipeline
|
|
79
|
+
|
|
80
|
+
config = load_config()
|
|
81
|
+
provider = "groq"
|
|
82
|
+
llm_fn = partial(_llm.chat, provider=provider, config=config)
|
|
83
|
+
video_dir = VIDEO_DIR / session
|
|
84
|
+
video_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
audio_dir = AUDIO_DIR / session
|
|
87
|
+
units = list((audio_dir / "tutorial_units").glob("unit_*.mp3"))
|
|
88
|
+
print(f"\n Resolving session {theme.bold(session)}...")
|
|
89
|
+
print(f" Found {len(units)} units. Starting visual pipeline.")
|
|
90
|
+
|
|
91
|
+
result = run_visual_pipeline(session, audio_dir, video_dir, llm_fn, difficulty="beginner")
|
|
92
|
+
ctx.last_video = result
|
|
93
|
+
ctx.current_session = session
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _assert_audio_ready(audio_session_dir: Path) -> None:
|
|
97
|
+
"""Raise ValueError if the audio session is not ready."""
|
|
98
|
+
if not audio_session_dir.exists():
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"Session '{audio_session_dir.name}' not found in {AUDIO_DIR}/.\n"
|
|
101
|
+
" Run /generate first to produce audio."
|
|
102
|
+
)
|
|
103
|
+
units_json = audio_session_dir / "tutorial.units.json"
|
|
104
|
+
if not units_json.exists():
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"tutorial.units.json not found in {audio_session_dir}.\n"
|
|
107
|
+
" The audio pipeline must complete before running /video."
|
|
108
|
+
)
|
|
109
|
+
mp3s = list((audio_session_dir / "tutorial_units").glob("unit_*.mp3"))
|
|
110
|
+
if not mp3s:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"No MP3 files found in {audio_session_dir / 'tutorial_units'}.\n"
|
|
113
|
+
" Run /generate (without --script-only) to produce audio."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _confirm_overwrite(mp4_path: Path) -> bool:
|
|
118
|
+
"""Prompt if full_session.mp4 already exists. Returns True to proceed."""
|
|
119
|
+
try:
|
|
120
|
+
answer = input(" Session already has a video. Regenerate? [y/N]: ").strip().lower()
|
|
121
|
+
return answer == "y"
|
|
122
|
+
except (EOFError, KeyboardInterrupt):
|
|
123
|
+
return False
|
tutor/config.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
from tutor.exceptions import ConfigError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Config:
|
|
14
|
+
groq_api_key: str = ""
|
|
15
|
+
openrouter_api_key: str = ""
|
|
16
|
+
default_provider: str = "groq"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_config() -> Config:
|
|
20
|
+
load_dotenv(Path(__file__).parent / ".env")
|
|
21
|
+
return Config(
|
|
22
|
+
groq_api_key=os.getenv("GROQ_API_KEY", ""),
|
|
23
|
+
openrouter_api_key=os.getenv("OPENROUTER_API_KEY", ""),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def preflight(input_path: str, provider: str, mode: str) -> Config:
|
|
28
|
+
config = load_config()
|
|
29
|
+
|
|
30
|
+
if mode not in ("inspect", "dry-run", "script-only"):
|
|
31
|
+
if input_path is None:
|
|
32
|
+
raise ConfigError(
|
|
33
|
+
"No input file specified.\n Usage: python tutor.py <input.md> [options]"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
if input_path is not None:
|
|
37
|
+
p = Path(input_path)
|
|
38
|
+
if not p.exists():
|
|
39
|
+
raise ConfigError(
|
|
40
|
+
f"Input file not found: {input_path}\n Check the path and try again."
|
|
41
|
+
)
|
|
42
|
+
if p.suffix.lower() != ".md":
|
|
43
|
+
raise ConfigError(
|
|
44
|
+
f"Input file must be a .md file, got: {p.suffix}\n Only Markdown files are supported."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if provider == "groq" and not config.groq_api_key:
|
|
48
|
+
raise ConfigError(
|
|
49
|
+
"GROQ_API_KEY not set.\n"
|
|
50
|
+
" Add it to tutor/.env: GROQ_API_KEY=gsk_...\n"
|
|
51
|
+
" Get a free key at: console.groq.com"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if provider == "openrouter" and not config.openrouter_api_key:
|
|
55
|
+
raise ConfigError(
|
|
56
|
+
"OPENROUTER_API_KEY not set.\n"
|
|
57
|
+
" Add it to tutor/.env: OPENROUTER_API_KEY=sk-or-...\n"
|
|
58
|
+
" Sign up at: openrouter.ai"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if mode not in ("script-only", "dry-run", "inspect") and input_path:
|
|
62
|
+
out_parent = Path(input_path).parent
|
|
63
|
+
if not os.access(out_parent, os.W_OK):
|
|
64
|
+
raise ConfigError(
|
|
65
|
+
f"Output directory is not writable: {out_parent}\n"
|
|
66
|
+
" Check permissions or specify a different --output path."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if mode == "generate":
|
|
70
|
+
_check_ffmpeg()
|
|
71
|
+
|
|
72
|
+
return config
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _check_ffmpeg() -> None:
|
|
76
|
+
try:
|
|
77
|
+
subprocess.run(
|
|
78
|
+
["ffmpeg", "-version"],
|
|
79
|
+
stdout=subprocess.DEVNULL,
|
|
80
|
+
stderr=subprocess.DEVNULL,
|
|
81
|
+
check=True,
|
|
82
|
+
)
|
|
83
|
+
return
|
|
84
|
+
except (FileNotFoundError, subprocess.CalledProcessError):
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
# PATH doesn't have ffmpeg — probe common Windows install layouts.
|
|
88
|
+
if sys.platform == "win32":
|
|
89
|
+
_winget_pkgs = Path.home() / "AppData/Local/Microsoft/WinGet/Packages"
|
|
90
|
+
candidates: list[Path] = [
|
|
91
|
+
Path("C:/ffmpeg/bin/ffmpeg.exe"),
|
|
92
|
+
*Path("C:/ffmpeg").glob("*/bin/ffmpeg.exe"),
|
|
93
|
+
Path("C:/Program Files/ffmpeg/bin/ffmpeg.exe"),
|
|
94
|
+
*Path("C:/Program Files/ffmpeg").glob("*/bin/ffmpeg.exe"),
|
|
95
|
+
Path("C:/tools/ffmpeg/bin/ffmpeg.exe"),
|
|
96
|
+
*(_winget_pkgs.glob("Gyan.FFmpeg*/*/bin/ffmpeg.exe") if _winget_pkgs.exists() else []),
|
|
97
|
+
*(_winget_pkgs.glob("*/ffmpeg*/bin/ffmpeg.exe") if _winget_pkgs.exists() else []),
|
|
98
|
+
]
|
|
99
|
+
for candidate in candidates:
|
|
100
|
+
if candidate.exists():
|
|
101
|
+
_inject_ffmpeg(candidate.parent)
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
raise ConfigError(
|
|
105
|
+
"ffmpeg not found in PATH.\n"
|
|
106
|
+
" Install with: winget install ffmpeg\n"
|
|
107
|
+
" Or add its bin\\ folder to your system PATH and restart the terminal."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _inject_ffmpeg(bin_dir: Path) -> None:
|
|
112
|
+
"""Add a discovered ffmpeg directory to the process PATH so pydub finds it."""
|
|
113
|
+
os.environ["PATH"] = str(bin_dir) + os.pathsep + os.environ.get("PATH", "")
|
|
114
|
+
# Patch pydub's runtime converter path if pydub is already imported.
|
|
115
|
+
try:
|
|
116
|
+
import pydub
|
|
117
|
+
|
|
118
|
+
pydub.AudioSegment.converter = str(bin_dir / "ffmpeg.exe")
|
|
119
|
+
pydub.AudioSegment.ffmpeg = str(bin_dir / "ffmpeg.exe")
|
|
120
|
+
pydub.AudioSegment.ffprobe = str(bin_dir / "ffprobe.exe")
|
|
121
|
+
except Exception:
|
|
122
|
+
pass
|
tutor/conftest.py
ADDED
tutor/constants.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Audio
|
|
2
|
+
WPM = 130
|
|
3
|
+
SILENCE_BREATH_MS = 150
|
|
4
|
+
SILENCE_TURN_MS = 500
|
|
5
|
+
SILENCE_UNIT_MS = 1200
|
|
6
|
+
SILENCE_SESSION_MS = 800
|
|
7
|
+
TTS_SEMAPHORE_LIMIT = 8
|
|
8
|
+
|
|
9
|
+
# Voices
|
|
10
|
+
VOICE_TUTOR = "en-US-GuyNeural"
|
|
11
|
+
VOICE_STUDENT = "en-US-JennyNeural"
|
|
12
|
+
VOICE_COTUTOR = "en-US-SaraNeural"
|
|
13
|
+
RATE_TUTOR = "+0%"
|
|
14
|
+
RATE_STUDENT = "+5%"
|
|
15
|
+
RATE_COTUTOR = "+0%"
|
|
16
|
+
|
|
17
|
+
# Ingestion
|
|
18
|
+
STRATEGY_A_TOKEN_LIMIT = 6_000
|
|
19
|
+
STRATEGY_B_TOKEN_LIMIT = 60_000
|
|
20
|
+
MAX_CHUNK_TOKENS = 4_000
|
|
21
|
+
MIN_CHUNK_TOKENS = 50
|
|
22
|
+
SUMMARY_CACHE_DIR = ".tutor_cache"
|
|
23
|
+
STRATEGY_C_WINDOW_TOKENS = 2_000
|
|
24
|
+
STRATEGY_C_OVERLAP_TOKENS = 200
|
|
25
|
+
|
|
26
|
+
# Complexity
|
|
27
|
+
WORDS_PER_COMPLEXITY: dict[int, int] = {1: 200, 2: 380, 3: 580}
|
|
28
|
+
OVERHEAD_WORDS = 200 # intro + transitions + outro
|
|
29
|
+
|
|
30
|
+
# Player
|
|
31
|
+
PLAYER_POLL_HZ = 10
|
|
32
|
+
PLAYER_BAR_WIDTH = 40
|
|
33
|
+
|
|
34
|
+
# Difficulty
|
|
35
|
+
DIFFICULTY_CONTEXT: dict[str, str] = {
|
|
36
|
+
"beginner": (
|
|
37
|
+
"The student has never written Java before. "
|
|
38
|
+
"Prioritise Tier 0-2 concepts. Analogies are mandatory. "
|
|
39
|
+
"Set max complexity to 2. Word budget multiplier: 1.3."
|
|
40
|
+
),
|
|
41
|
+
"intermediate": (
|
|
42
|
+
"The student has written Java for 3 months. "
|
|
43
|
+
"Assume JVM basics are known. Use Tier 1-4 concepts. "
|
|
44
|
+
"Word budget multiplier: 1.0."
|
|
45
|
+
),
|
|
46
|
+
"advanced": (
|
|
47
|
+
"The student knows OOP but makes design-level mistakes. "
|
|
48
|
+
"Focus on Tier 3-6: contracts, concurrency, edge cases. "
|
|
49
|
+
"Word budget multiplier: 0.8."
|
|
50
|
+
),
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
DIFFICULTY_MULTIPLIERS: dict[str, float] = {
|
|
54
|
+
"beginner": 1.3,
|
|
55
|
+
"intermediate": 1.0,
|
|
56
|
+
"advanced": 0.8,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Source/summarise token limits live in tutor/llm_config.toml (limits section)
|
|
60
|
+
|
|
61
|
+
# Versioning
|
|
62
|
+
PROMPT_VERSION = "v1"
|
|
63
|
+
MAX_UNITS = 8
|
|
64
|
+
MIN_UNITS = 3
|
|
65
|
+
DEFAULT_DURATION_MIN = 20
|
|
66
|
+
DEFAULT_DIFFICULTY = "beginner"
|
|
67
|
+
DEFAULT_FORMAT = "tutor-student"
|
|
68
|
+
DEFAULT_SUBJECT = "java"
|
|
69
|
+
|
|
70
|
+
# Code-to-speech substitutions (pattern, replacement)
|
|
71
|
+
CODE_SUBSTITUTIONS = [
|
|
72
|
+
(r"List<String>", "a List of Strings"),
|
|
73
|
+
(r"HashMap<(\w+),\s*(\w+)>", r"a HashMap from \1 to \2"),
|
|
74
|
+
(r"!=", "not equal to"),
|
|
75
|
+
(r"(?<![=!<>])==(?![=])", "double equals"),
|
|
76
|
+
(r"\.equals\(", "dot equals("),
|
|
77
|
+
(r"@(\w+)", r"\1 annotation"),
|
|
78
|
+
(r"(\w+)\[\]", r"\1 array"),
|
|
79
|
+
(r"NullPointerException", "Null Pointer Exception"),
|
|
80
|
+
(r"StackOverflowError", "Stack Overflow Error"),
|
|
81
|
+
(r"IllegalArgumentException", "Illegal Argument Exception"),
|
|
82
|
+
]
|
tutor/exceptions.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
class TutorError(Exception):
|
|
2
|
+
"""Base for all tutor AI errors."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class IngestionError(TutorError):
|
|
6
|
+
"""Raised when doc parsing or chunking fails."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMError(TutorError):
|
|
10
|
+
"""Raised when an LLM call fails or returns unparseable output."""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TTSError(TutorError):
|
|
14
|
+
"""Raised when audio rendering fails."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PlayerError(TutorError):
|
|
18
|
+
"""Raised when the interactive player encounters an unrecoverable state."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ConfigError(TutorError):
|
|
22
|
+
"""Raised when required config (API key, ffmpeg) is missing."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class VideoError(TutorError):
|
|
26
|
+
"""Raised when any step of the video pipeline fails."""
|
|
File without changes
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from tutor.audio import sanitizer
|
|
2
|
+
from tutor.models import DialogueLine, TeachingUnit
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def assemble(
|
|
6
|
+
units: list[TeachingUnit],
|
|
7
|
+
all_lines: list[list[DialogueLine]],
|
|
8
|
+
fmt: str,
|
|
9
|
+
doc_title: str,
|
|
10
|
+
mode: str = "conversation",
|
|
11
|
+
) -> list[DialogueLine]:
|
|
12
|
+
result: list[DialogueLine] = []
|
|
13
|
+
|
|
14
|
+
result.extend(_build_intro(units, doc_title, mode))
|
|
15
|
+
|
|
16
|
+
for i, (unit, lines) in enumerate(zip(units, all_lines, strict=False)):
|
|
17
|
+
result.extend(lines)
|
|
18
|
+
if mode == "conversation" and i < len(units) - 1:
|
|
19
|
+
next_concept = units[i + 1].concept if i + 1 < len(units) else ""
|
|
20
|
+
result.append(
|
|
21
|
+
DialogueLine(
|
|
22
|
+
speaker="MAYA",
|
|
23
|
+
text=f"Alright, let's move on to the next one: {next_concept}.",
|
|
24
|
+
unit_number=unit.unit,
|
|
25
|
+
)
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
result.extend(_build_outro(units, doc_title, mode))
|
|
29
|
+
|
|
30
|
+
for line in result:
|
|
31
|
+
line.text = sanitizer.apply(line.text)
|
|
32
|
+
|
|
33
|
+
return result
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _build_intro(units: list[TeachingUnit], doc_title: str, mode: str) -> list[DialogueLine]:
|
|
37
|
+
if mode == "explain":
|
|
38
|
+
text = (
|
|
39
|
+
f"Let's walk through {doc_title}. "
|
|
40
|
+
f"I'll cover {len(units)} section{'s' if len(units) != 1 else ''} from top to bottom, "
|
|
41
|
+
f"following the document as you read along."
|
|
42
|
+
)
|
|
43
|
+
return [DialogueLine(speaker="ALEX", text=text, unit_number=0)]
|
|
44
|
+
else:
|
|
45
|
+
concepts = ", ".join(u.concept for u in units)
|
|
46
|
+
alex_text = (
|
|
47
|
+
f"Welcome. In this session we're covering {doc_title}. "
|
|
48
|
+
f"We'll walk through {len(units)} concept{'s' if len(units) != 1 else ''} step by step: {concepts}."
|
|
49
|
+
)
|
|
50
|
+
maya_text = (
|
|
51
|
+
"We'll explain each one clearly, with analogies, so by the end you'll have "
|
|
52
|
+
"a solid picture of how it all fits together. Let's get into it."
|
|
53
|
+
)
|
|
54
|
+
return [
|
|
55
|
+
DialogueLine(speaker="ALEX", text=alex_text, unit_number=0),
|
|
56
|
+
DialogueLine(speaker="MAYA", text=maya_text, unit_number=0),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _build_outro(units: list[TeachingUnit], doc_title: str, mode: str) -> list[DialogueLine]:
|
|
61
|
+
if mode == "explain":
|
|
62
|
+
text = (
|
|
63
|
+
f"That covers all {len(units)} section{'s' if len(units) != 1 else ''} of {doc_title}. "
|
|
64
|
+
f"You can replay any section with the replay command, or ask a question with ask."
|
|
65
|
+
)
|
|
66
|
+
return [DialogueLine(speaker="ALEX", text=text, unit_number=-1)]
|
|
67
|
+
else:
|
|
68
|
+
hooks = ". ".join(u.memory_hook for u in units if u.memory_hook)
|
|
69
|
+
alex_text = (
|
|
70
|
+
f"That's everything for {doc_title}. "
|
|
71
|
+
f"We covered {len(units)} concept{'s' if len(units) != 1 else ''} today. "
|
|
72
|
+
f"Here's what to hold onto: {hooks}."
|
|
73
|
+
)
|
|
74
|
+
maya_text = (
|
|
75
|
+
"If any of those didn't fully click, replay that unit and let it settle. "
|
|
76
|
+
"These are the ideas that show up constantly in real Java code."
|
|
77
|
+
)
|
|
78
|
+
return [
|
|
79
|
+
DialogueLine(speaker="ALEX", text=alex_text, unit_number=-1),
|
|
80
|
+
DialogueLine(speaker="MAYA", text=maya_text, unit_number=-1),
|
|
81
|
+
]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from tutor.constants import (
|
|
4
|
+
DIFFICULTY_CONTEXT,
|
|
5
|
+
DIFFICULTY_MULTIPLIERS,
|
|
6
|
+
OVERHEAD_WORDS,
|
|
7
|
+
WORDS_PER_COMPLEXITY,
|
|
8
|
+
WPM,
|
|
9
|
+
)
|
|
10
|
+
from tutor.exceptions import LLMError
|
|
11
|
+
from tutor.infra.llm import LLMFn, load_prompt, parse_json_response
|
|
12
|
+
from tutor.models import Chunk, DocProfile, TeachingUnit
|
|
13
|
+
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def plan(
|
|
18
|
+
chunks: list[Chunk],
|
|
19
|
+
profile: DocProfile,
|
|
20
|
+
duration_min: int,
|
|
21
|
+
llm_fn: LLMFn,
|
|
22
|
+
difficulty: str = "beginner",
|
|
23
|
+
topic: str | None = None,
|
|
24
|
+
) -> list[TeachingUnit]:
|
|
25
|
+
summaries = "\n".join(f"[{c.chunk_id}] {c.summary}" for c in chunks)
|
|
26
|
+
difficulty_context = DIFFICULTY_CONTEXT.get(difficulty, DIFFICULTY_CONTEXT["beginner"])
|
|
27
|
+
|
|
28
|
+
prompt = load_prompt("curriculum.txt").format(
|
|
29
|
+
doc_title=profile.filepath,
|
|
30
|
+
duration_min=duration_min,
|
|
31
|
+
difficulty=difficulty,
|
|
32
|
+
difficulty_context=difficulty_context,
|
|
33
|
+
summaries=summaries,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
if topic:
|
|
37
|
+
topic_instruction = (
|
|
38
|
+
f'IMPORTANT: You must include a unit that covers the topic "{topic}". '
|
|
39
|
+
"If the source document does not mention it, create a unit that acknowledges "
|
|
40
|
+
"it is out of scope but explains why it matters in relation to what was covered."
|
|
41
|
+
)
|
|
42
|
+
prompt = topic_instruction + "\n\n" + prompt
|
|
43
|
+
|
|
44
|
+
messages = [{"role": "user", "content": prompt}]
|
|
45
|
+
log.info("Planning curriculum for %d chunks, %d min target", len(chunks), duration_min)
|
|
46
|
+
|
|
47
|
+
raw = llm_fn(messages, call_type="curriculum")
|
|
48
|
+
try:
|
|
49
|
+
data = parse_json_response(raw)
|
|
50
|
+
except LLMError:
|
|
51
|
+
retry_messages = messages + [
|
|
52
|
+
{"role": "assistant", "content": raw},
|
|
53
|
+
{
|
|
54
|
+
"role": "user",
|
|
55
|
+
"content": "Your previous response could not be parsed as JSON. Reply with the raw JSON array only, no other text.",
|
|
56
|
+
},
|
|
57
|
+
]
|
|
58
|
+
raw = llm_fn(retry_messages, call_type="curriculum")
|
|
59
|
+
data = parse_json_response(raw)
|
|
60
|
+
|
|
61
|
+
if not isinstance(data, list) or len(data) == 0:
|
|
62
|
+
raise LLMError("Curriculum planner returned no units")
|
|
63
|
+
|
|
64
|
+
total_budget = duration_min * WPM - OVERHEAD_WORDS
|
|
65
|
+
total_complexity = sum(int(u.get("complexity", 2)) for u in data)
|
|
66
|
+
if total_complexity == 0:
|
|
67
|
+
total_complexity = len(data)
|
|
68
|
+
multiplier = DIFFICULTY_MULTIPLIERS.get(difficulty, 1.0)
|
|
69
|
+
base = total_budget / total_complexity
|
|
70
|
+
|
|
71
|
+
units: list[TeachingUnit] = []
|
|
72
|
+
for i, u in enumerate(data):
|
|
73
|
+
complexity = max(1, min(3, int(u.get("complexity", 2))))
|
|
74
|
+
word_budget = max(
|
|
75
|
+
round(base * complexity * multiplier),
|
|
76
|
+
WORDS_PER_COMPLEXITY[1], # floor: min 200 words even for advanced
|
|
77
|
+
)
|
|
78
|
+
units.append(
|
|
79
|
+
TeachingUnit(
|
|
80
|
+
unit=i + 1,
|
|
81
|
+
concept=u.get("concept", f"Unit {i + 1}"),
|
|
82
|
+
source_sections=u.get("source_sections", []),
|
|
83
|
+
complexity=complexity,
|
|
84
|
+
word_budget=word_budget,
|
|
85
|
+
key_facts=u.get("key_facts", []),
|
|
86
|
+
common_misconception=u.get("common_misconception", ""),
|
|
87
|
+
good_analogy=u.get("good_analogy", ""),
|
|
88
|
+
question_style=u.get("question_style", "recall"),
|
|
89
|
+
memory_hook=u.get("memory_hook", ""),
|
|
90
|
+
prerequisite_concepts=u.get("prerequisite_concepts", []),
|
|
91
|
+
js_contrast=u.get("js_contrast", ""),
|
|
92
|
+
production_relevance=u.get("production_relevance", ""),
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
log.info("Curriculum planned: %d units", len(units))
|
|
97
|
+
return units
|