notegen 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. notegen-0.1.0/.gitignore +11 -0
  2. notegen-0.1.0/PKG-INFO +91 -0
  3. notegen-0.1.0/README.md +59 -0
  4. notegen-0.1.0/notes_gen/__init__.py +0 -0
  5. notegen-0.1.0/notes_gen/cli.py +166 -0
  6. notegen-0.1.0/notes_gen/config.py +56 -0
  7. notegen-0.1.0/notes_gen/output/__init__.py +0 -0
  8. notegen-0.1.0/notes_gen/output/formatter.py +36 -0
  9. notegen-0.1.0/notes_gen/output/writer.py +40 -0
  10. notegen-0.1.0/notes_gen/processing/__init__.py +0 -0
  11. notegen-0.1.0/notes_gen/processing/chunker.py +32 -0
  12. notegen-0.1.0/notes_gen/processing/filter.py +32 -0
  13. notegen-0.1.0/notes_gen/processing/llm.py +46 -0
  14. notegen-0.1.0/notes_gen/processing/merger.py +56 -0
  15. notegen-0.1.0/notes_gen/sources/__init__.py +0 -0
  16. notegen-0.1.0/notes_gen/sources/text.py +58 -0
  17. notegen-0.1.0/notes_gen/sources/web.py +165 -0
  18. notegen-0.1.0/notes_gen/sources/youtube.py +162 -0
  19. notegen-0.1.0/pyproject.toml +71 -0
  20. notegen-0.1.0/tests/__init__.py +0 -0
  21. notegen-0.1.0/tests/fixtures/.gitkeep +0 -0
  22. notegen-0.1.0/tests/fixtures/sample_html.html +30 -0
  23. notegen-0.1.0/tests/fixtures/sample_transcript.txt +17 -0
  24. notegen-0.1.0/tests/test_chunker.py +57 -0
  25. notegen-0.1.0/tests/test_cli.py +100 -0
  26. notegen-0.1.0/tests/test_config.py +71 -0
  27. notegen-0.1.0/tests/test_filter.py +47 -0
  28. notegen-0.1.0/tests/test_formatter.py +85 -0
  29. notegen-0.1.0/tests/test_llm.py +77 -0
  30. notegen-0.1.0/tests/test_merger.py +52 -0
  31. notegen-0.1.0/tests/test_text.py +87 -0
  32. notegen-0.1.0/tests/test_web.py +153 -0
  33. notegen-0.1.0/tests/test_writer.py +73 -0
  34. notegen-0.1.0/tests/test_youtube.py +197 -0
@@ -0,0 +1,11 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .coverage
11
+ htmlcov/
notegen-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: notegen
3
+ Version: 0.1.0
4
+ Summary: Convert YouTube videos, playlists, and web pages into Obsidian markdown notes using LLMs
5
+ Project-URL: Homepage, https://github.com/sidhunair280/notegen
6
+ Project-URL: Bug Tracker, https://github.com/sidhunair280/notegen/issues
7
+ Author-email: Sidhu Nair <sidhunair280@gmail.com>
8
+ License: MIT
9
+ Keywords: knowledge-management,llm,markdown,notes,obsidian,youtube
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: End Users/Desktop
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
18
+ Classifier: Topic :: Utilities
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: anyio>=4
21
+ Requires-Dist: beautifulsoup4>=4.12
22
+ Requires-Dist: httpx>=0.27
23
+ Requires-Dist: litellm>=1.40
24
+ Requires-Dist: pyyaml>=6
25
+ Requires-Dist: rich>=13
26
+ Requires-Dist: tiktoken>=0.7
27
+ Requires-Dist: trafilatura>=1.12
28
+ Requires-Dist: typer>=0.12
29
+ Requires-Dist: youtube-transcript-api>=0.6
30
+ Requires-Dist: yt-dlp>=2024.1
31
+ Description-Content-Type: text/markdown
32
+
33
+ # notegen
34
+
35
+ Convert YouTube videos, playlists, and web pages into structured Obsidian-flavored markdown notes using LLMs.
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ pip install notegen
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ```bash
46
+ # Auto-detect source type
47
+ notegen https://youtube.com/watch?v=...
48
+ notegen https://example.com/article
49
+ notegen transcript.txt
50
+
51
+ # Explicit commands
52
+ notegen video <youtube-url>
53
+ notegen playlist <playlist-url> [--force]
54
+ notegen web <url>
55
+ notegen text <file-or-stdin>
56
+
57
+ # Config
58
+ notegen config init # create ~/.config/notes-gen/config.yaml
59
+ notegen config show # print resolved config
60
+ ```
61
+
62
+ ## Options
63
+
64
+ | Flag | Description |
65
+ |---|---|
66
+ | `-o / --output-dir` | Override output directory |
67
+ | `-m / --model` | LiteLLM model string (e.g. `openai/gpt-4o`) |
68
+ | `--no-mermaid` | Disable mermaid diagram generation |
69
+ | `--force` | Skip playlist videos without captions instead of aborting |
70
+
71
+ ## Config file (`~/.config/notes-gen/config.yaml`)
72
+
73
+ ```yaml
74
+ output_dir: ~/notes
75
+ model: anthropic/claude-sonnet-4-6
76
+ mermaid: true
77
+ max_concurrent: 5
78
+ web_max_pages: 50
79
+ web_max_depth: 3
80
+ ```
81
+
82
+ API keys are read from environment variables (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`).
83
+
84
+ ## Output format
85
+
86
+ Obsidian-flavored markdown with YAML frontmatter, `## / ###` headings, `> [!TIP]` / `> [!WARNING]` callouts, mermaid diagrams, and `[[wikilinks]]`.
87
+
88
+ ## Requirements
89
+
90
+ - Python ≥ 3.11
91
+ - An API key for your chosen LLM provider
@@ -0,0 +1,59 @@
1
+ # notegen
2
+
3
+ Convert YouTube videos, playlists, and web pages into structured Obsidian-flavored markdown notes using LLMs.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install notegen
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ # Auto-detect source type
15
+ notegen https://youtube.com/watch?v=...
16
+ notegen https://example.com/article
17
+ notegen transcript.txt
18
+
19
+ # Explicit commands
20
+ notegen video <youtube-url>
21
+ notegen playlist <playlist-url> [--force]
22
+ notegen web <url>
23
+ notegen text <file-or-stdin>
24
+
25
+ # Config
26
+ notegen config init # create ~/.config/notes-gen/config.yaml
27
+ notegen config show # print resolved config
28
+ ```
29
+
30
+ ## Options
31
+
32
+ | Flag | Description |
33
+ |---|---|
34
+ | `-o / --output-dir` | Override output directory |
35
+ | `-m / --model` | LiteLLM model string (e.g. `openai/gpt-4o`) |
36
+ | `--no-mermaid` | Disable mermaid diagram generation |
37
+ | `--force` | Skip playlist videos without captions instead of aborting |
38
+
39
+ ## Config file (`~/.config/notes-gen/config.yaml`)
40
+
41
+ ```yaml
42
+ output_dir: ~/notes
43
+ model: anthropic/claude-sonnet-4-6
44
+ mermaid: true
45
+ max_concurrent: 5
46
+ web_max_pages: 50
47
+ web_max_depth: 3
48
+ ```
49
+
50
+ API keys are read from environment variables (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`).
51
+
52
+ ## Output format
53
+
54
+ Obsidian-flavored markdown with YAML frontmatter, `## / ###` headings, `> [!TIP]` / `> [!WARNING]` callouts, mermaid diagrams, and `[[wikilinks]]`.
55
+
56
+ ## Requirements
57
+
58
+ - Python ≥ 3.11
59
+ - An API key for your chosen LLM provider
File without changes
@@ -0,0 +1,166 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import typer
5
+ import yaml
6
+
7
+ from notes_gen.config import DEFAULT_CONFIG_PATH, Config, load_config, merge_cli_overrides
8
+
9
+ app = typer.Typer(no_args_is_help=True, help="Convert YouTube/web content to Obsidian notes.")
10
+ config_app = typer.Typer(help="Manage configuration.")
11
+ app.add_typer(config_app, name="config")
12
+
13
+ _KNOWN_SUBCOMMANDS = {"video", "playlist", "web", "text", "config", "auto"}
14
+
15
+
16
+ def _run_auto(source: str, cfg: Config, force: bool = False) -> None:
17
+ if "youtube.com/playlist" in source or ("list=" in source and "youtube.com" in source):
18
+ from notes_gen.sources.youtube import run_playlist_pipeline
19
+
20
+ index_path = run_playlist_pipeline(source, cfg, force=force)
21
+ typer.echo(f"Playlist notes written to {index_path.parent}")
22
+ elif "youtube.com/watch" in source or "youtu.be/" in source:
23
+ from notes_gen.sources.youtube import run_video_pipeline
24
+
25
+ output_path = run_video_pipeline(source, cfg)
26
+ typer.echo(f"Notes written to {output_path}")
27
+ elif source.startswith("http://") or source.startswith("https://"):
28
+ from notes_gen.sources.web import run_web_crawl_pipeline
29
+
30
+ output_path = run_web_crawl_pipeline(source, cfg)
31
+ typer.echo(f"Notes written to {output_path}")
32
+ else:
33
+ from notes_gen.sources.text import run_text_pipeline
34
+
35
+ output_path = run_text_pipeline(source, cfg)
36
+ typer.echo(f"Notes written to {output_path}")
37
+
38
+
39
+ @config_app.command("init")
40
+ def config_init() -> None:
41
+ """Create default config at ~/.config/notes-gen/config.yaml."""
42
+ if DEFAULT_CONFIG_PATH.exists():
43
+ typer.echo(f"Config already exists: {DEFAULT_CONFIG_PATH}")
44
+ raise typer.Exit(1)
45
+ DEFAULT_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
46
+ defaults = Config()
47
+ data = {
48
+ "output_dir": str(defaults.output_dir),
49
+ "model": defaults.model,
50
+ "mermaid": defaults.mermaid,
51
+ "max_concurrent": defaults.max_concurrent,
52
+ "web_max_pages": defaults.web_max_pages,
53
+ "web_max_depth": defaults.web_max_depth,
54
+ }
55
+ DEFAULT_CONFIG_PATH.write_text(yaml.dump(data, default_flow_style=False))
56
+ typer.echo(f"Config written to {DEFAULT_CONFIG_PATH}")
57
+
58
+
59
+ @config_app.command("show")
60
+ def config_show() -> None:
61
+ """Print resolved config as YAML."""
62
+ cfg = load_config()
63
+ data = {
64
+ "output_dir": str(cfg.output_dir),
65
+ "model": cfg.model,
66
+ "mermaid": cfg.mermaid,
67
+ "max_concurrent": cfg.max_concurrent,
68
+ "web_max_pages": cfg.web_max_pages,
69
+ "web_max_depth": cfg.web_max_depth,
70
+ }
71
+ typer.echo(yaml.dump(data, default_flow_style=False), nl=False)
72
+
73
+
74
+ @app.command()
75
+ def video(
76
+ url: str = typer.Argument(..., help="YouTube video URL"),
77
+ output_dir: Optional[Path] = typer.Option(None, "--output-dir", "-o"),
78
+ model: Optional[str] = typer.Option(None, "--model", "-m"),
79
+ no_mermaid: bool = typer.Option(False, "--no-mermaid"),
80
+ verbose: bool = typer.Option(False, "--verbose", "-v"),
81
+ force: bool = typer.Option(False, "--force"),
82
+ ) -> None:
83
+ """Generate notes from a YouTube video."""
84
+ from notes_gen.sources.youtube import run_video_pipeline
85
+
86
+ cfg = load_config()
87
+ cfg = merge_cli_overrides(cfg, output_dir=output_dir, model=model, mermaid=not no_mermaid)
88
+ output_path = run_video_pipeline(url, cfg)
89
+ typer.echo(f"Notes written to {output_path}")
90
+
91
+
92
+ @app.command()
93
+ def playlist(
94
+ url: str = typer.Argument(..., help="YouTube playlist URL"),
95
+ output_dir: Optional[Path] = typer.Option(None, "--output-dir", "-o"),
96
+ model: Optional[str] = typer.Option(None, "--model", "-m"),
97
+ no_mermaid: bool = typer.Option(False, "--no-mermaid"),
98
+ verbose: bool = typer.Option(False, "--verbose", "-v"),
99
+ force: bool = typer.Option(False, "--force"),
100
+ ) -> None:
101
+ """Generate notes from a YouTube playlist."""
102
+ from notes_gen.sources.youtube import run_playlist_pipeline
103
+
104
+ cfg = load_config()
105
+ cfg = merge_cli_overrides(cfg, output_dir=output_dir, model=model, mermaid=not no_mermaid)
106
+ index_path = run_playlist_pipeline(url, cfg, force=force)
107
+ typer.echo(f"Playlist notes written to {index_path.parent}")
108
+
109
+
110
+ @app.command()
111
+ def web(
112
+ url: str = typer.Argument(..., help="Web URL to fetch"),
113
+ output_dir: Optional[Path] = typer.Option(None, "--output-dir", "-o"),
114
+ model: Optional[str] = typer.Option(None, "--model", "-m"),
115
+ no_mermaid: bool = typer.Option(False, "--no-mermaid"),
116
+ verbose: bool = typer.Option(False, "--verbose", "-v"),
117
+ ) -> None:
118
+ """Generate notes from a web page (crawls same-domain links)."""
119
+ from notes_gen.sources.web import run_web_crawl_pipeline
120
+
121
+ cfg = load_config()
122
+ cfg = merge_cli_overrides(cfg, output_dir=output_dir, model=model, mermaid=not no_mermaid)
123
+ output_path = run_web_crawl_pipeline(url, cfg)
124
+ typer.echo(f"Notes written to {output_path}")
125
+
126
+
127
+ @app.command()
128
+ def text(
129
+ source: str = typer.Argument(..., help="File path or '-' for stdin"),
130
+ output_dir: Optional[Path] = typer.Option(None, "--output-dir", "-o"),
131
+ model: Optional[str] = typer.Option(None, "--model", "-m"),
132
+ no_mermaid: bool = typer.Option(False, "--no-mermaid"),
133
+ verbose: bool = typer.Option(False, "--verbose", "-v"),
134
+ ) -> None:
135
+ """Generate notes from a text file or stdin."""
136
+ from notes_gen.sources.text import run_text_pipeline
137
+
138
+ cfg = load_config()
139
+ cfg = merge_cli_overrides(cfg, output_dir=output_dir, model=model, mermaid=not no_mermaid)
140
+ output_path = run_text_pipeline(source, cfg)
141
+ typer.echo(f"Notes written to {output_path}")
142
+
143
+
144
+ @app.command()
145
+ def auto(
146
+ source: str = typer.Argument(..., help="YouTube URL, web URL, or file path"),
147
+ output_dir: Optional[Path] = typer.Option(None, "--output-dir", "-o"),
148
+ model: Optional[str] = typer.Option(None, "--model", "-m"),
149
+ no_mermaid: bool = typer.Option(False, "--no-mermaid"),
150
+ verbose: bool = typer.Option(False, "--verbose", "-v"),
151
+ force: bool = typer.Option(False, "--force"),
152
+ ) -> None:
153
+ """Auto-detect source type and generate notes."""
154
+ cfg = load_config()
155
+ cfg = merge_cli_overrides(cfg, output_dir=output_dir, model=model, mermaid=not no_mermaid)
156
+ _run_auto(source, cfg, force=force)
157
+
158
+
159
+ def main() -> None:
160
+ """Entry point: inject 'auto' subcommand when bare URL/file is passed."""
161
+ import sys
162
+
163
+ args = sys.argv[1:]
164
+ if args and not args[0].startswith("-") and args[0] not in _KNOWN_SUBCOMMANDS:
165
+ sys.argv.insert(1, "auto")
166
+ app()
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, replace
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import yaml
8
+
9
+ DEFAULT_CONFIG_PATH = Path.home() / ".config" / "notes-gen" / "config.yaml"
10
+
11
+ DEFAULT_MODEL = "anthropic/claude-sonnet-4-6"
12
+
13
+
14
+ @dataclass
15
+ class Config:
16
+ output_dir: Path = None # type: ignore[assignment]
17
+ model: str = DEFAULT_MODEL
18
+ mermaid: bool = True
19
+ max_concurrent: int = 5
20
+ web_max_pages: int = 50
21
+ web_max_depth: int = 3
22
+
23
+ def __post_init__(self) -> None:
24
+ if self.output_dir is None:
25
+ self.output_dir = Path.home() / "notes"
26
+ self.output_dir = Path(self.output_dir)
27
+
28
+
29
+ def load_config(path: Path = DEFAULT_CONFIG_PATH) -> Config:
30
+ if not path.exists():
31
+ return Config()
32
+ raw = yaml.safe_load(path.read_text()) or {}
33
+ kwargs: dict = {}
34
+ if "output_dir" in raw:
35
+ kwargs["output_dir"] = Path(raw["output_dir"])
36
+ for field in ("model", "mermaid", "max_concurrent", "web_max_pages", "web_max_depth"):
37
+ if field in raw:
38
+ kwargs[field] = raw[field]
39
+ return Config(**kwargs)
40
+
41
+
42
+ def merge_cli_overrides(
43
+ cfg: Config,
44
+ *,
45
+ output_dir: Optional[Path] = None,
46
+ model: Optional[str] = None,
47
+ mermaid: Optional[bool] = None,
48
+ ) -> Config:
49
+ overrides: dict = {}
50
+ if output_dir is not None:
51
+ overrides["output_dir"] = output_dir
52
+ if model is not None:
53
+ overrides["model"] = model
54
+ if mermaid is not None:
55
+ overrides["mermaid"] = mermaid
56
+ return replace(cfg, **overrides)
File without changes
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import unicodedata
5
+ from datetime import date
6
+
7
+ import yaml
8
+
9
+
10
+ def slugify(text: str) -> str:
11
+ if not text:
12
+ return ""
13
+ text = unicodedata.normalize("NFKD", text)
14
+ text = text.encode("ascii", "ignore").decode("ascii")
15
+ text = text.lower()
16
+ text = re.sub(r"[^\w\s-]", "", text)
17
+ text = re.sub(r"[\s_]+", "-", text)
18
+ text = text.strip("-")
19
+ return text
20
+
21
+
22
+ def build_frontmatter(
23
+ title: str,
24
+ source: str,
25
+ type: str,
26
+ tags: list[str],
27
+ date: date,
28
+ ) -> str:
29
+ data = {
30
+ "title": title,
31
+ "source": source,
32
+ "type": type,
33
+ "tags": tags,
34
+ "date": date.isoformat(),
35
+ }
36
+ return f"---\n{yaml.dump(data, default_flow_style=False, allow_unicode=True)}---\n"
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ def write_note(
7
+ path: Path,
8
+ content: str,
9
+ overwrite_policy: str = "overwrite",
10
+ ) -> Path:
11
+ path.parent.mkdir(parents=True, exist_ok=True)
12
+
13
+ if path.exists():
14
+ if overwrite_policy == "skip":
15
+ return path
16
+ if overwrite_policy == "rename":
17
+ stem = path.stem
18
+ suffix = path.suffix
19
+ counter = 1
20
+ new_path = path.parent / f"{stem}-{counter}{suffix}"
21
+ while new_path.exists():
22
+ counter += 1
23
+ new_path = path.parent / f"{stem}-{counter}{suffix}"
24
+ new_path.write_text(content, encoding="utf-8")
25
+ return new_path
26
+
27
+ path.write_text(content, encoding="utf-8")
28
+ return path
29
+
30
+
31
+ def write_index(playlist_dir: Path, video_slugs: list[str]) -> Path:
32
+ playlist_dir.mkdir(parents=True, exist_ok=True)
33
+ title = playlist_dir.name.replace("-", " ").title()
34
+ lines = [f"# {title}\n"]
35
+ for slug in video_slugs:
36
+ lines.append(f"- [[{slug}]]")
37
+ content = "\n".join(lines) + "\n"
38
+ index_path = playlist_dir / "index.md"
39
+ index_path.write_text(content, encoding="utf-8")
40
+ return index_path
File without changes
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import tiktoken
4
+
5
+ _ENCODING = tiktoken.get_encoding("cl100k_base")
6
+
7
+
8
+ def count_tokens(text: str) -> int:
9
+ return len(_ENCODING.encode(text))
10
+
11
+
12
+ def chunk_text(text: str, max_tokens: int = 12000, overlap: int = 200) -> list[str]:
13
+ if not text.strip():
14
+ return []
15
+
16
+ tokens = _ENCODING.encode(text)
17
+ total = len(tokens)
18
+
19
+ if total <= max_tokens:
20
+ return [text]
21
+
22
+ chunks: list[str] = []
23
+ start = 0
24
+ while start < total:
25
+ end = min(start + max_tokens, total)
26
+ chunk_tokens = tokens[start:end]
27
+ chunks.append(_ENCODING.decode(chunk_tokens))
28
+ if end >= total:
29
+ break
30
+ start = end - overlap
31
+
32
+ return chunks
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ _META_PATTERNS = [
6
+ r"(?i)\b(sponsored?|sponsorship)\b.{0,120}",
7
+ r"(?i)\bsmash\s+that\s+(like|subscribe)\b.{0,80}",
8
+ r"(?i)\blike\s+(and\s+)?subscribe\b.{0,80}",
9
+ r"(?i)\bsubscribe\s+(to\s+)?(my|the|this)\s+(channel|newsletter|podcast)\b.{0,80}",
10
+ r"(?i)\bfollow\s+(me\s+)?on\s+(twitter|instagram|tiktok|linkedin|facebook)\b.{0,80}",
11
+ r"(?i)\bcheck\s+out\s+my\s+merch\b.{0,100}",
12
+ r"(?i)\bsupport\s+(me\s+)?on\s+patreon\b.{0,100}",
13
+ r"(?i)\bjoin\s+(my\s+)?discord\b.{0,80}",
14
+ r"(?i)\bget\s+\d+%\s+off\b.{0,120}",
15
+ r"(?i)\buse\s+(code|promo)\s+\w+\s+(for|to\s+get)\b.{0,120}",
16
+ r"(?i)\bnordvpn\b.{0,150}",
17
+ r"(?i)\bteespring\b.{0,100}",
18
+ ]
19
+
20
+ _COMPILED = [re.compile(p) for p in _META_PATTERNS]
21
+
22
+
23
+ def remove_meta(text: str) -> str:
24
+ if not text:
25
+ return text
26
+ lines = text.split("\n")
27
+ cleaned: list[str] = []
28
+ for line in lines:
29
+ if any(pat.search(line) for pat in _COMPILED):
30
+ continue
31
+ cleaned.append(line)
32
+ return "\n".join(cleaned)
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import litellm
4
+
5
+ from notes_gen.config import Config
6
+
7
+ _SYSTEM_PROMPT = """\
8
+ You are an expert note-taker converting transcripts and articles into structured Obsidian notes.
9
+
10
+ Rules:
11
+ - Use Obsidian-flavored markdown: YAML frontmatter (omit here — added externally),
12
+ ## and ### headings only
13
+ - Use `> [!TIP]` and `> [!WARNING]` callouts for important insights
14
+ - Use mermaid diagrams for flows and architectures when appropriate
15
+ - Use [[wikilinks]] for cross-references to related concepts
16
+ - Be comprehensive — never truncate to hit a length limit
17
+ - Auto-infer relevant tags from content
18
+ - Write for a technical audience learning the subject
19
+ """
20
+
21
+ _USER_PROMPT_TEMPLATE = """\
22
+ Convert the following content into structured Obsidian notes:
23
+
24
+ <content>
25
+ {chunk}
26
+ </content>
27
+
28
+ Produce well-organized markdown notes with clear headings, key concepts,
29
+ code examples where relevant, and callouts for important points.
30
+ """
31
+
32
+
33
+ def generate_notes(chunks: list[str], cfg: Config) -> str:
34
+ results: list[str] = []
35
+ for chunk in chunks:
36
+ messages = [
37
+ {"role": "system", "content": _SYSTEM_PROMPT},
38
+ {"role": "user", "content": _USER_PROMPT_TEMPLATE.format(chunk=chunk)},
39
+ ]
40
+ response = litellm.completion(
41
+ model=cfg.model,
42
+ messages=messages,
43
+ temperature=0.3,
44
+ )
45
+ results.append(response.choices[0].message.content)
46
+ return "\n\n".join(results)
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ _HEADER_RE = re.compile(r"^(#{1,3})\s+(.+)$", re.MULTILINE)
6
+
7
+
8
+ def merge(notes: list[str]) -> str:
9
+ if not notes:
10
+ return ""
11
+
12
+ seen_headers: set[str] = set()
13
+ output_sections: list[str] = []
14
+
15
+ for note in notes:
16
+ if not note.strip():
17
+ continue
18
+ sections = _split_into_sections(note)
19
+ for header, body in sections:
20
+ key = header.strip().lower() if header else None
21
+ if key and key in seen_headers:
22
+ continue
23
+ if key:
24
+ seen_headers.add(key)
25
+ output_sections.append((header, body))
26
+
27
+ if not output_sections:
28
+ return ""
29
+
30
+ parts: list[str] = []
31
+ for header, body in output_sections:
32
+ if header:
33
+ parts.append(f"{header}\n\n{body.strip()}")
34
+ else:
35
+ parts.append(body.strip())
36
+
37
+ return "\n\n".join(p for p in parts if p)
38
+
39
+
40
+ def _split_into_sections(text: str) -> list[tuple[str, str]]:
41
+ """Split text into (header, body) pairs. First section may have no header."""
42
+ positions = [(m.start(), m.group()) for m in _HEADER_RE.finditer(text)]
43
+ if not positions:
44
+ return [("", text)]
45
+
46
+ sections: list[tuple[str, str]] = []
47
+ if positions[0][0] > 0:
48
+ sections.append(("", text[: positions[0][0]]))
49
+
50
+ for i, (pos, header) in enumerate(positions):
51
+ header_end = pos + len(header)
52
+ next_pos = positions[i + 1][0] if i + 1 < len(positions) else len(text)
53
+ body = text[header_end:next_pos].lstrip("\n")
54
+ sections.append((header, body))
55
+
56
+ return sections
File without changes