learnx-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. learnx_cli-0.3.0.dist-info/METADATA +240 -0
  2. learnx_cli-0.3.0.dist-info/RECORD +131 -0
  3. learnx_cli-0.3.0.dist-info/WHEEL +4 -0
  4. learnx_cli-0.3.0.dist-info/entry_points.txt +2 -0
  5. tutor/.env copy.example +4 -0
  6. tutor/__init__.py +0 -0
  7. tutor/__main__.py +4 -0
  8. tutor/assets/__init__.py +5 -0
  9. tutor/assets/html/fonts/Inter-Bold.woff2 +0 -0
  10. tutor/assets/html/fonts/Inter-Regular.woff2 +0 -0
  11. tutor/assets/html/fonts/Inter-SemiBold.woff2 +0 -0
  12. tutor/assets/html/fonts/JetBrainsMono-Regular.woff2 +0 -0
  13. tutor/assets/html/highlight-java.min.js +2 -0
  14. tutor/assets/html/highlight-javascript.min.js +2 -0
  15. tutor/assets/html/highlight-python.min.js +2 -0
  16. tutor/assets/html/highlight.min.js +17 -0
  17. tutor/assets/html/mermaid.min.js +31 -0
  18. tutor/assets/html/slide_base.css +464 -0
  19. tutor/assets/html/theme-learnx-dark.css +12 -0
  20. tutor/audio/__init__.py +0 -0
  21. tutor/audio/audio_builder.py +143 -0
  22. tutor/audio/sanitizer.py +9 -0
  23. tutor/audio/tts_renderer.py +54 -0
  24. tutor/cli/__init__.py +0 -0
  25. tutor/cli/commands.py +391 -0
  26. tutor/cli/logo.py +21 -0
  27. tutor/cli/playback_commands.py +239 -0
  28. tutor/cli/shell.py +91 -0
  29. tutor/cli/shell_context.py +18 -0
  30. tutor/cli/theme.py +39 -0
  31. tutor/cli/video_commands.py +123 -0
  32. tutor/config.py +122 -0
  33. tutor/conftest.py +5 -0
  34. tutor/constants.py +82 -0
  35. tutor/exceptions.py +26 -0
  36. tutor/generation/__init__.py +0 -0
  37. tutor/generation/assembler.py +81 -0
  38. tutor/generation/curriculum.py +97 -0
  39. tutor/generation/dialogue.py +172 -0
  40. tutor/generation/narrator.py +122 -0
  41. tutor/generation/segment_parser.py +223 -0
  42. tutor/generation/segment_planner.py +200 -0
  43. tutor/generation/visual_planner.py +205 -0
  44. tutor/infra/__init__.py +0 -0
  45. tutor/infra/llm.py +152 -0
  46. tutor/ingestion/__init__.py +0 -0
  47. tutor/ingestion/chunker.py +171 -0
  48. tutor/ingestion/doc_analyzer.py +41 -0
  49. tutor/ingestion/parse_content.py +19 -0
  50. tutor/ingestion/summarizer.py +51 -0
  51. tutor/inspector.py +117 -0
  52. tutor/llm_config.toml +58 -0
  53. tutor/models.py +147 -0
  54. tutor/player/__init__.py +0 -0
  55. tutor/player/input_handler.py +45 -0
  56. tutor/player/player.py +308 -0
  57. tutor/player/player_display.py +117 -0
  58. tutor/prompts/curriculum.txt +67 -0
  59. tutor/prompts/dialogue.txt +62 -0
  60. tutor/prompts/narrate.txt +34 -0
  61. tutor/prompts/qa.txt +17 -0
  62. tutor/prompts/summarize.txt +9 -0
  63. tutor/prompts/visual.txt +60 -0
  64. tutor/prompts/visual_v3.txt +91 -0
  65. tutor/qa/__init__.py +0 -0
  66. tutor/qa/qa.py +105 -0
  67. tutor/requirements-dev.txt +2 -0
  68. tutor/requirements.txt +12 -0
  69. tutor/sample_docs/headingless_large.md +1 -0
  70. tutor/sample_docs/headingless_test.md +1 -0
  71. tutor/sample_docs/java-basics.md +78 -0
  72. tutor/tests/__init__.py +0 -0
  73. tutor/tests/audio/__init__.py +0 -0
  74. tutor/tests/audio/test_audio_builder.py +106 -0
  75. tutor/tests/audio/test_sanitizer.py +41 -0
  76. tutor/tests/cli/__init__.py +0 -0
  77. tutor/tests/cli/test_commands.py +67 -0
  78. tutor/tests/cli/test_video_commands.py +190 -0
  79. tutor/tests/e2e/README.md +61 -0
  80. tutor/tests/e2e/__init__.py +0 -0
  81. tutor/tests/e2e/conftest.py +117 -0
  82. tutor/tests/e2e/fixtures/README.md +17 -0
  83. tutor/tests/e2e/fixtures/sample.md +13 -0
  84. tutor/tests/e2e/test_audio_quality.py +40 -0
  85. tutor/tests/e2e/test_av_sync.py +56 -0
  86. tutor/tests/e2e/test_pipeline_smoke.py +37 -0
  87. tutor/tests/e2e/test_slide_render.py +72 -0
  88. tutor/tests/e2e/test_video_streams.py +104 -0
  89. tutor/tests/generation/__init__.py +0 -0
  90. tutor/tests/generation/conftest.py +134 -0
  91. tutor/tests/generation/test_assembler.py +64 -0
  92. tutor/tests/generation/test_curriculum.py +107 -0
  93. tutor/tests/generation/test_narrator.py +165 -0
  94. tutor/tests/generation/test_segment_edge_cases.py +280 -0
  95. tutor/tests/generation/test_segment_planner.py +324 -0
  96. tutor/tests/generation/test_visual_planner.py +319 -0
  97. tutor/tests/ingestion/__init__.py +0 -0
  98. tutor/tests/ingestion/test_chunker.py +94 -0
  99. tutor/tests/ingestion/test_doc_analyzer.py +51 -0
  100. tutor/tests/player/__init__.py +0 -0
  101. tutor/tests/player/test_player_states.py +88 -0
  102. tutor/tests/test_assets.py +39 -0
  103. tutor/tests/test_models_visual.py +180 -0
  104. tutor/tests/visual/__init__.py +0 -0
  105. tutor/tests/visual/test_beat_timer.py +321 -0
  106. tutor/tests/visual/test_pipeline_integration.py +178 -0
  107. tutor/tests/visual/test_slide_renderer.py +298 -0
  108. tutor/tests/visual/test_subtitle_writer.py +165 -0
  109. tutor/tests/visual/test_video_assembler.py +108 -0
  110. tutor/tests/visual/test_visual_pipeline.py +270 -0
  111. tutor/tutor.py +365 -0
  112. tutor/visual/__init__.py +213 -0
  113. tutor/visual/beat_timer.py +222 -0
  114. tutor/visual/slide_renderer.py +236 -0
  115. tutor/visual/subtitle_writer.py +187 -0
  116. tutor/visual/templates/_base.html.j2 +40 -0
  117. tutor/visual/templates/analogy.html.j2 +21 -0
  118. tutor/visual/templates/callout.html.j2 +10 -0
  119. tutor/visual/templates/code_example.html.j2 +12 -0
  120. tutor/visual/templates/comparison.html.j2 +28 -0
  121. tutor/visual/templates/decision_guide.html.j2 +37 -0
  122. tutor/visual/templates/definition.html.j2 +13 -0
  123. tutor/visual/templates/diagram.html.j2 +11 -0
  124. tutor/visual/templates/hook_question.html.j2 +17 -0
  125. tutor/visual/templates/key_insight.html.j2 +9 -0
  126. tutor/visual/templates/memory_hook.html.j2 +7 -0
  127. tutor/visual/templates/outro.html.j2 +16 -0
  128. tutor/visual/templates/question_prompt.html.j2 +13 -0
  129. tutor/visual/templates/step_sequence.html.j2 +14 -0
  130. tutor/visual/templates/title_card.html.j2 +12 -0
  131. tutor/visual/video_assembler.py +299 -0
@@ -0,0 +1,171 @@
1
+ import logging
2
+ import re
3
+
4
+ from tutor.constants import (
5
+ MAX_CHUNK_TOKENS,
6
+ MIN_CHUNK_TOKENS,
7
+ STRATEGY_C_OVERLAP_TOKENS,
8
+ STRATEGY_C_WINDOW_TOKENS,
9
+ )
10
+ from tutor.ingestion import parse_content
11
+ from tutor.models import Chunk, DocProfile
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def chunk(text: str, profile: DocProfile) -> list[Chunk]:
17
+ if profile.strategy == "A":
18
+ chunks = _strategy_a(text)
19
+ elif profile.strategy == "B":
20
+ chunks = _strategy_b(text)
21
+ else:
22
+ chunks = _strategy_c(text)
23
+ return _apply_quality_rules(chunks)
24
+
25
+
26
+ def _slugify(heading: str) -> str:
27
+ return re.sub(r"[^a-z0-9]+", "_", heading.lower()).strip("_")
28
+
29
+
30
+ def _estimate_tokens(text: str) -> int:
31
+ return int(len(text.split()) * 1.3)
32
+
33
+
34
+ def _strategy_a(text: str) -> list[Chunk]:
35
+ return [
36
+ Chunk(
37
+ chunk_id="full_doc",
38
+ breadcrumb="Full Document",
39
+ heading="Full Document",
40
+ level=0,
41
+ token_count=_estimate_tokens(text),
42
+ text=text,
43
+ )
44
+ ]
45
+
46
+
47
+ def _strategy_b(text: str) -> list[Chunk]:
48
+ sections = re.split(r"\n(?=## )", text)
49
+ sections = [s for s in sections if s.strip()]
50
+
51
+ if len(sections) < 2:
52
+ log.warning(
53
+ "Document has no headings — falling back to Strategy C (sliding window). "
54
+ "Consider adding ## headings to improve chunk quality."
55
+ )
56
+ return _strategy_c(text)
57
+
58
+ chunks: list[Chunk] = []
59
+ for section in sections:
60
+ lines = section.strip().split("\n")
61
+ heading_line = lines[0].lstrip("#").strip()
62
+ chunks.extend(_split_section(section, heading_line, parent_heading=None))
63
+
64
+ return chunks
65
+
66
+
67
+ def _split_section(section: str, heading: str, parent_heading: str | None) -> list[Chunk]:
68
+ token_count = _estimate_tokens(section)
69
+
70
+ if token_count <= MAX_CHUNK_TOKENS:
71
+ prefix = f"## {parent_heading}\n\n" if parent_heading else ""
72
+ return [
73
+ Chunk(
74
+ chunk_id=_slugify(heading),
75
+ breadcrumb=f"{parent_heading} > {heading}" if parent_heading else heading,
76
+ heading=heading,
77
+ level=2,
78
+ token_count=token_count,
79
+ text=prefix + section,
80
+ )
81
+ ]
82
+
83
+ subsections = re.split(r"\n(?=### )", section)
84
+ if len(subsections) < 2:
85
+ prefix = f"## {parent_heading}\n\n" if parent_heading else ""
86
+ return [
87
+ Chunk(
88
+ chunk_id=_slugify(heading),
89
+ breadcrumb=heading,
90
+ heading=heading,
91
+ level=2,
92
+ token_count=token_count,
93
+ text=prefix + section,
94
+ )
95
+ ]
96
+
97
+ result: list[Chunk] = []
98
+ for sub in subsections:
99
+ sub_lines = sub.strip().split("\n")
100
+ sub_heading = sub_lines[0].lstrip("#").strip()
101
+ prefix = f"## {heading}\n\n"
102
+ result.append(
103
+ Chunk(
104
+ chunk_id=_slugify(f"{heading}_{sub_heading}"),
105
+ breadcrumb=f"{heading} > {sub_heading}",
106
+ heading=sub_heading,
107
+ level=3,
108
+ token_count=_estimate_tokens(sub),
109
+ text=prefix + sub,
110
+ )
111
+ )
112
+ return result
113
+
114
+
115
+ def _strategy_c(text: str) -> list[Chunk]:
116
+ word_window = int(STRATEGY_C_WINDOW_TOKENS / 1.3)
117
+ word_overlap = int(STRATEGY_C_OVERLAP_TOKENS / 1.3)
118
+
119
+ words = text.split()
120
+ chunks: list[Chunk] = []
121
+ start = 0
122
+ idx = 0
123
+
124
+ while start < len(words):
125
+ end = min(start + word_window, len(words))
126
+ window_words = words[start:end]
127
+
128
+ if end < len(words):
129
+ window_text = " ".join(window_words)
130
+ last_period = window_text.rfind(". ")
131
+ if last_period > len(window_text) // 2:
132
+ window_text = window_text[: last_period + 1]
133
+ window_words = window_text.split()
134
+
135
+ chunk_text = " ".join(window_words)
136
+ token_count = int(len(window_words) * 1.3)
137
+
138
+ chunks.append(
139
+ Chunk(
140
+ chunk_id=f"window_{idx:03d}",
141
+ breadcrumb=f"Window {idx + 1}",
142
+ heading=f"Window {idx + 1}",
143
+ level=0,
144
+ token_count=token_count,
145
+ text=chunk_text,
146
+ has_code=False,
147
+ overlapping=(idx > 0),
148
+ )
149
+ )
150
+
151
+ idx += 1
152
+ step = word_window - word_overlap
153
+ start += max(step, 1)
154
+
155
+ return chunks
156
+
157
+
158
+ def _apply_quality_rules(chunks: list[Chunk]) -> list[Chunk]:
159
+ merged: list[Chunk] = []
160
+ for c in chunks:
161
+ if c.token_count < MIN_CHUNK_TOKENS and merged:
162
+ prev = merged[-1]
163
+ prev.text += "\n\n" + c.text
164
+ prev.token_count = _estimate_tokens(prev.text)
165
+ else:
166
+ merged.append(c)
167
+
168
+ for c in merged:
169
+ parse_content.enrich(c)
170
+
171
+ return merged
@@ -0,0 +1,41 @@
1
+ import re
2
+ from pathlib import Path
3
+ from typing import Literal
4
+
5
+ from tutor.constants import STRATEGY_A_TOKEN_LIMIT, STRATEGY_B_TOKEN_LIMIT
6
+ from tutor.exceptions import IngestionError
7
+ from tutor.models import DocProfile
8
+
9
+
10
+ def analyze(filepath: str) -> DocProfile:
11
+ path = Path(filepath)
12
+ try:
13
+ text = path.read_text(encoding="utf-8")
14
+ except OSError as e:
15
+ raise IngestionError(f"Cannot read file: {filepath}") from e
16
+
17
+ raw_bytes = path.stat().st_size
18
+ word_count = len(text.split())
19
+ estimated_tokens = int(word_count * 1.3)
20
+
21
+ strategy: Literal["A", "B", "C"]
22
+ if estimated_tokens <= STRATEGY_A_TOKEN_LIMIT:
23
+ strategy = "A"
24
+ elif estimated_tokens <= STRATEGY_B_TOKEN_LIMIT:
25
+ strategy = "B"
26
+ else:
27
+ strategy = "C"
28
+
29
+ section_count = len(re.findall(r"^#{1,3}\s", text, re.MULTILINE))
30
+ has_code_blocks = "```" in text
31
+ language_hint = "java" if "```java" in text.lower() else "general"
32
+
33
+ return DocProfile(
34
+ filepath=filepath,
35
+ raw_bytes=raw_bytes,
36
+ estimated_tokens=estimated_tokens,
37
+ strategy=strategy,
38
+ section_count=section_count,
39
+ has_code_blocks=has_code_blocks,
40
+ language_hint=language_hint,
41
+ )
@@ -0,0 +1,19 @@
1
+ import re
2
+
3
+ from tutor.models import Chunk
4
+
5
+
6
+ def enrich(chunk: Chunk) -> Chunk:
7
+ chunk.has_code = "```" in chunk.text
8
+
9
+ raw_terms = re.findall(r"\*\*(.+?)\*\*|`(.+?)`", chunk.text)
10
+ seen: set[str] = set()
11
+ key_terms: list[str] = []
12
+ for bold, code in raw_terms:
13
+ term = bold or code
14
+ if term and term not in seen:
15
+ seen.add(term)
16
+ key_terms.append(term)
17
+
18
+ chunk.key_terms = key_terms
19
+ return chunk
@@ -0,0 +1,51 @@
1
+ import hashlib
2
+ import logging
3
+ from pathlib import Path
4
+
5
+ from tutor.constants import PROMPT_VERSION, SUMMARY_CACHE_DIR
6
+ from tutor.infra.llm import LIMITS, LLMFn, load_prompt
7
+ from tutor.models import Chunk
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def summarize_all(
13
+ chunks: list[Chunk],
14
+ llm_fn: LLMFn,
15
+ cache_dir: str = SUMMARY_CACHE_DIR,
16
+ ) -> list[Chunk]:
17
+ cache_path = Path(cache_dir)
18
+ cache_path.mkdir(parents=True, exist_ok=True)
19
+
20
+ prompt_text = load_prompt("summarize.txt")
21
+
22
+ for c in chunks:
23
+ if c.chunk_id == "full_doc":
24
+ c.summary = c.text[:500]
25
+ continue
26
+
27
+ cache_key = hashlib.md5((c.text + PROMPT_VERSION).encode()).hexdigest()
28
+ cache_file = cache_path / f"{cache_key}.summary.txt"
29
+
30
+ if cache_file.exists():
31
+ c.summary = cache_file.read_text(encoding="utf-8")
32
+ log.debug("Cache hit for chunk %s", c.chunk_id)
33
+ continue
34
+
35
+ log.info("Summarizing chunk %s (%d tokens)", c.chunk_id, c.token_count)
36
+ chunk_text = _truncate_to_tokens(c.text, LIMITS["max_summarize_input_tokens"])
37
+ messages = [
38
+ {"role": "system", "content": prompt_text},
39
+ {"role": "user", "content": chunk_text},
40
+ ]
41
+ summary = llm_fn(messages, call_type="summarize")
42
+ cache_file.write_text(summary, encoding="utf-8")
43
+ c.summary = summary
44
+
45
+ return chunks
46
+
47
+
48
+ def _truncate_to_tokens(text: str, max_tokens: int) -> str:
49
+ max_words = int(max_tokens / 1.3)
50
+ words = text.split()
51
+ return " ".join(words[:max_words]) if len(words) > max_words else text
tutor/inspector.py ADDED
@@ -0,0 +1,117 @@
1
+ from tutor.constants import WPM
2
+ from tutor.models import Chunk, DocProfile, TeachingUnit
3
+
4
+
5
+ def report_ingestion(profile: DocProfile, chunks: list[Chunk]) -> None:
6
+ print("\n=== Ingestion Report ===")
7
+ print(f"File: {profile.filepath}")
8
+ print(f"Raw size: {profile.raw_bytes:,} bytes")
9
+ print(f"Estimated tokens: {profile.estimated_tokens:,}")
10
+ print(f"Strategy: {profile.strategy}")
11
+ print(f"Sections found: {profile.section_count}")
12
+ print(f"Chunks created: {len(chunks)}")
13
+
14
+ if chunks:
15
+ avg = sum(c.token_count for c in chunks) // len(chunks)
16
+ largest = max(chunks, key=lambda c: c.token_count)
17
+ code_count = sum(1 for c in chunks if c.has_code)
18
+ print(f" Avg chunk size: {avg} tokens")
19
+ print(f" Largest chunk: {largest.token_count} tokens ({largest.chunk_id})")
20
+ print(f" Chunks with code: {code_count}/{len(chunks)}")
21
+
22
+ print("\n=== Chunk Map ===")
23
+ print(f"{'ID':<25} {'Heading':<35} {'Tokens':>7} {'Code'}")
24
+ print("-" * 75)
25
+ for c in chunks:
26
+ code_flag = "yes" if c.has_code else "no"
27
+ print(f"{c.chunk_id:<25} {c.heading:<35} {c.token_count:>7} {code_flag}")
28
+
29
+ _report_warnings(chunks)
30
+ _report_orphans(chunks)
31
+
32
+
33
+ def _report_warnings(chunks: list[Chunk]) -> None:
34
+ from tutor.constants import MAX_CHUNK_TOKENS
35
+
36
+ warnings = []
37
+ for c in chunks:
38
+ if c.token_count > MAX_CHUNK_TOKENS and c.has_code:
39
+ warnings.append(
40
+ f"! {c.chunk_id} — code block preserved intact at {c.token_count} tokens (correct behavior)."
41
+ )
42
+ elif c.token_count > MAX_CHUNK_TOKENS:
43
+ warnings.append(
44
+ f"! {c.chunk_id} — {c.token_count} tokens, may produce shallow dialogue."
45
+ )
46
+
47
+ if warnings:
48
+ print("\n=== Chunk Quality Warnings ===")
49
+ for w in warnings:
50
+ print(w)
51
+
52
+
53
+ def _report_orphans(chunks: list[Chunk]) -> None:
54
+ orphans = [c for c in chunks if c.token_count < 200]
55
+ if orphans:
56
+ print("\n=== Orphan Risk ===")
57
+ for c in orphans:
58
+ print(
59
+ f" {c.chunk_id} ({c.token_count} tokens) — small section, may be skipped by planner"
60
+ )
61
+
62
+
63
+ def report_curriculum(
64
+ units: list[TeachingUnit],
65
+ chunks: list[Chunk],
66
+ duration_min: int,
67
+ ) -> None:
68
+ print("\n=== Duration Plan ===")
69
+ print(f"Target duration: {duration_min} min")
70
+ print(f"Word budget: {duration_min * WPM} words (@ {WPM} WPM)")
71
+ print("Silence overhead: ~1m 20s")
72
+
73
+ print("\n=== Teaching Units ===")
74
+ header = f"{'':45} {'Complexity':>10} {'Words':>7} {'Est. time'}"
75
+ print(header)
76
+ print("-" * 80)
77
+
78
+ intro_words = 100
79
+ intro_secs = intro_words * 60 // WPM
80
+ print(f"{'Intro':<45} {'—':>10} {intro_words:>7} {_fmt_time(intro_secs)}")
81
+
82
+ total_words = intro_words
83
+ total_secs = intro_secs
84
+
85
+ for u in units:
86
+ secs = u.word_budget * 60 // WPM
87
+ label = f'Unit {u.unit} "{u.concept}"'
88
+ print(f"{label:<45} {u.complexity:>10} {u.word_budget:>7} {_fmt_time(secs)}")
89
+ total_words += u.word_budget
90
+ total_secs += secs
91
+
92
+ outro_words = 80
93
+ outro_secs = outro_words * 60 // WPM
94
+ print(f"{'Outro (memory hook recap)':<45} {'—':>10} {outro_words:>7} {_fmt_time(outro_secs)}")
95
+ total_words += outro_words
96
+ total_secs += outro_secs + 80 # silence overhead
97
+
98
+ print("-" * 80)
99
+ print(f"{'Total':<45} {'':>10} {total_words:>7} {_fmt_time(total_secs)}")
100
+
101
+ used_ids = {sid for u in units for sid in u.source_sections}
102
+ used = sum(1 for c in chunks if c.chunk_id in used_ids)
103
+ pct = used / len(chunks) * 100 if chunks else 0
104
+ skipped = [c.chunk_id for c in chunks if c.chunk_id not in used_ids]
105
+
106
+ print("\n=== Coverage ===")
107
+ print(f"Sections used: {used}/{len(chunks)} ({pct:.1f}%)")
108
+ if skipped:
109
+ print(f"Sections skipped: {', '.join(skipped[:8])}")
110
+ if len(skipped) > 8:
111
+ print(f" ... and {len(skipped) - 8} more")
112
+
113
+
114
+ def _fmt_time(seconds: int) -> str:
115
+ m = seconds // 60
116
+ s = seconds % 60
117
+ return f"{m}m {s:02d}s"
tutor/llm_config.toml ADDED
@@ -0,0 +1,58 @@
1
+ # ============================================================
2
+ # LearnX LLM configuration
3
+ # Edit this file to change models or token budgets.
4
+ # No Python code changes required.
5
+ # ============================================================
6
+
7
+ # ── Model selection ─────────────────────────────────────────
8
+ # One model name per call_type per provider.
9
+ # call_types: curriculum | dialogue | summarize | qa
10
+
11
+ [providers.groq]
12
+ curriculum = "llama-3.3-70b-versatile"
13
+ dialogue = "llama-3.3-70b-versatile"
14
+ summarize = "llama-3.1-8b-instant"
15
+ qa = "llama-3.1-8b-instant"
16
+ visual = "llama-3.3-70b-versatile" # 70b needed for reliable DOT output
17
+ segments = "llama-3.3-70b-versatile"
18
+
19
+ [providers.openrouter]
20
+ curriculum = "poolside/laguna-xs.2:free"
21
+ dialogue = "poolside/laguna-xs.2:free"
22
+ summarize = "poolside/laguna-xs.2:free"
23
+ qa = "poolside/laguna-xs.2:free"
24
+ visual = "poolside/laguna-xs.2:free"
25
+ segments = "poolside/laguna-xs.2:free"
26
+
27
+ # ── Response token caps ──────────────────────────────────────
28
+ # Max tokens the model may generate per call type.
29
+ # Together with the input prompt they must stay under the
30
+ # provider's per-request token limit (Groq free tier: 6 000).
31
+
32
+ [max_tokens]
33
+ curriculum = 2000
34
+ dialogue = 2000
35
+ summarize = 400
36
+ qa = 600
37
+ visual = 1200
38
+ segments = 2000
39
+
40
+ # ── Input size limits ────────────────────────────────────────
41
+ # max_source_tokens: source text sent with each dialogue prompt
42
+ # max_summarize_input_tokens: chunk text sent to the summariser
43
+ #
44
+ # dialogue uses llama-3.3-70b-versatile (128k context on Groq) or owl-alpha (OpenRouter).
45
+ # 8000 source + ~1200 prompt overhead + 2000 response = ~11200 tokens — fine for both.
46
+ # max_summarize_input_tokens kept at 3000; those calls send much smaller chunks.
47
+
48
+ [limits]
49
+ max_source_tokens = 8000
50
+ max_summarize_input_tokens = 3000
51
+ max_visual_source_tokens = 800
52
+
53
+ # ── Call behaviour ───────────────────────────────────────────
54
+
55
+ [llm]
56
+ temperature = 0.7
57
+ retry_count = 3 # attempts before giving up
58
+ retry_delay_s = 2.0 # seconds between retries
tutor/models.py ADDED
@@ -0,0 +1,147 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Literal
5
+
6
+
7
+ @dataclass
8
+ class DocProfile:
9
+ filepath: str
10
+ raw_bytes: int
11
+ estimated_tokens: int
12
+ strategy: Literal["A", "B", "C"]
13
+ section_count: int
14
+ has_code_blocks: bool
15
+ language_hint: str
16
+
17
+
18
+ @dataclass
19
+ class Chunk:
20
+ chunk_id: str
21
+ breadcrumb: str
22
+ heading: str
23
+ level: int
24
+ token_count: int
25
+ text: str
26
+ has_code: bool = False
27
+ summary: str = ""
28
+ overlapping: bool = False
29
+ key_terms: list[str] = field(default_factory=list)
30
+
31
+
32
+ @dataclass
33
+ class TeachingUnit:
34
+ unit: int
35
+ concept: str
36
+ source_sections: list[str]
37
+ complexity: int # 1 | 2 | 3
38
+ word_budget: int
39
+ key_facts: list[str]
40
+ common_misconception: str
41
+ good_analogy: str
42
+ question_style: str
43
+ memory_hook: str
44
+ prerequisite_concepts: list[str] = field(default_factory=list)
45
+ js_contrast: str = ""
46
+ production_relevance: str = ""
47
+
48
+
49
+ @dataclass
50
+ class DialogueLine:
51
+ speaker: str # "ALEX" | "MAYA" | "SAM"
52
+ text: str
53
+ unit_number: int # 0 = intro, 1+ = unit, -1 = outro
54
+
55
+
56
+ @dataclass
57
+ class RenderedSegment:
58
+ line: DialogueLine
59
+ audio_path: str
60
+ duration_ms: int
61
+
62
+
63
+ @dataclass
64
+ class QAExchange:
65
+ id: int
66
+ unit_number: int
67
+ unit_concept: str
68
+ position_seconds: int
69
+ question: str
70
+ answer: str
71
+ source_sections: list[str]
72
+ timestamp: str
73
+
74
+
75
+ @dataclass
76
+ class SessionLog:
77
+ source_file: str
78
+ session_start: str
79
+ format: str
80
+ duration_minutes: int
81
+ exchanges: list[QAExchange] = field(default_factory=list)
82
+
83
+
84
+ @dataclass
85
+ class TimingEntry:
86
+ line_index: int # 0-based within the unit
87
+ speaker: str # "ALEX" | "MAYA" | "SAM"
88
+ text: str # dialogue line text — for cross-referencing only
89
+ start_ms: int # offset from unit MP3 start, in milliseconds
90
+ end_ms: int # exclusive end; end_ms - start_ms == len(audio) in ms
91
+
92
+
93
+ VALID_VISUAL_TYPES: frozenset[str] = frozenset(
94
+ {
95
+ "hook_question",
96
+ "definition",
97
+ "analogy",
98
+ "comparison",
99
+ "code_example",
100
+ "diagram",
101
+ "question_prompt",
102
+ "decision_guide",
103
+ "key_insight",
104
+ "memory_hook",
105
+ "step_sequence",
106
+ "callout",
107
+ }
108
+ )
109
+
110
+
111
+ @dataclass
112
+ class SlideSegment:
113
+ unit_index: int
114
+ segment_index: int
115
+ lines_start: int
116
+ lines_end: int
117
+ visual_type: str
118
+ title: str
119
+ body: str | None
120
+ code: str | None
121
+ language: str | None
122
+ mermaid: str | None
123
+ left: str | None
124
+ right: str | None
125
+ rows: list | None
126
+ png_path: str = ""
127
+
128
+
129
+ @dataclass
130
+ class VisualSpec:
131
+ unit_index: int
132
+ slide_type: str # "title_card" | "unit" | "outro"
133
+ concept: str = ""
134
+ hook_question: str = ""
135
+ key_points: list[str] = field(default_factory=list)
136
+ code_snippet: str | None = None
137
+ diagram_type: str = "none"
138
+ diagram_spec: str | dict[str, object] | None = None
139
+ memory_hook: str = ""
140
+ analogy: str = ""
141
+ # title_card fields
142
+ title: str = ""
143
+ subtitle: str = ""
144
+ doc_source: str = ""
145
+ # outro fields
146
+ memory_hooks: list[str] = field(default_factory=list)
147
+ session_stats: str = ""
File without changes
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import sys
3
+
4
+ log = logging.getLogger(__name__)
5
+
6
+
7
+ def get_key() -> str | None:
8
+ """Return the pressed key as a string, or None if no key is available."""
9
+ if sys.platform == "win32":
10
+ return _get_key_windows()
11
+ return _get_key_unix()
12
+
13
+
14
+ def _get_key_windows() -> str | None:
15
+ import msvcrt
16
+
17
+ if msvcrt.kbhit():
18
+ raw = msvcrt.getch()
19
+ try:
20
+ return raw.decode("utf-8")
21
+ except UnicodeDecodeError:
22
+ return None
23
+ return None
24
+
25
+
26
+ def _get_key_unix() -> str | None:
27
+ try:
28
+ import threading
29
+
30
+ import readchar
31
+
32
+ result: list[str | None] = [None]
33
+
34
+ def _read() -> None:
35
+ result[0] = readchar.readchar()
36
+
37
+ t = threading.Thread(target=_read, daemon=True)
38
+ t.start()
39
+ t.join(timeout=0.05)
40
+ return result[0]
41
+ except ImportError:
42
+ log.warning(
43
+ "readchar not installed — keyboard input unavailable on non-Windows. pip install readchar"
44
+ )
45
+ return None