notegen 1.0.2__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {notegen-1.0.2 → notegen-2.0.0}/PKG-INFO +116 -11
  2. notegen-2.0.0/README.md +261 -0
  3. notegen-2.0.0/notes_gen/cache.py +63 -0
  4. notegen-2.0.0/notes_gen/cli.py +525 -0
  5. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/config.py +34 -2
  6. notegen-2.0.0/notes_gen/output/formats.py +73 -0
  7. notegen-2.0.0/notes_gen/processing/dry_run.py +101 -0
  8. notegen-2.0.0/notes_gen/processing/llm.py +229 -0
  9. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/processing/merger.py +25 -2
  10. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/sources/text.py +35 -4
  11. notegen-2.0.0/notes_gen/sources/watch.py +83 -0
  12. notegen-2.0.0/notes_gen/sources/web.py +246 -0
  13. notegen-2.0.0/notes_gen/sources/youtube.py +276 -0
  14. {notegen-1.0.2 → notegen-2.0.0}/pyproject.toml +2 -1
  15. notegen-2.0.0/tests/fixtures/sample_html.html +65 -0
  16. notegen-2.0.0/tests/test_cache.py +85 -0
  17. notegen-2.0.0/tests/test_cli.py +310 -0
  18. {notegen-1.0.2 → notegen-2.0.0}/tests/test_config.py +27 -0
  19. notegen-2.0.0/tests/test_formats.py +65 -0
  20. {notegen-1.0.2 → notegen-2.0.0}/tests/test_llm.py +128 -9
  21. notegen-2.0.0/tests/test_merger.py +115 -0
  22. {notegen-1.0.2 → notegen-2.0.0}/tests/test_text.py +87 -87
  23. notegen-2.0.0/tests/test_watch.py +91 -0
  24. {notegen-1.0.2 → notegen-2.0.0}/tests/test_web.py +178 -153
  25. {notegen-1.0.2 → notegen-2.0.0}/tests/test_youtube.py +251 -197
  26. notegen-1.0.2/README.md +0 -157
  27. notegen-1.0.2/notes_gen/cli.py +0 -199
  28. notegen-1.0.2/notes_gen/processing/llm.py +0 -128
  29. notegen-1.0.2/notes_gen/sources/web.py +0 -165
  30. notegen-1.0.2/notes_gen/sources/youtube.py +0 -162
  31. notegen-1.0.2/tests/fixtures/sample_html.html +0 -30
  32. notegen-1.0.2/tests/test_cli.py +0 -139
  33. notegen-1.0.2/tests/test_merger.py +0 -52
  34. {notegen-1.0.2 → notegen-2.0.0}/.gitignore +0 -0
  35. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/__init__.py +0 -0
  36. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/output/__init__.py +0 -0
  37. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/output/formatter.py +0 -0
  38. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/output/writer.py +0 -0
  39. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/processing/__init__.py +0 -0
  40. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/processing/chunker.py +0 -0
  41. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/processing/filter.py +0 -0
  42. {notegen-1.0.2 → notegen-2.0.0}/notes_gen/sources/__init__.py +0 -0
  43. {notegen-1.0.2 → notegen-2.0.0}/tests/__init__.py +0 -0
  44. {notegen-1.0.2 → notegen-2.0.0}/tests/fixtures/.gitkeep +0 -0
  45. {notegen-1.0.2 → notegen-2.0.0}/tests/fixtures/sample_transcript.txt +0 -0
  46. {notegen-1.0.2 → notegen-2.0.0}/tests/test_chunker.py +0 -0
  47. {notegen-1.0.2 → notegen-2.0.0}/tests/test_filter.py +0 -0
  48. {notegen-1.0.2 → notegen-2.0.0}/tests/test_formatter.py +0 -0
  49. {notegen-1.0.2 → notegen-2.0.0}/tests/test_writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: notegen
3
- Version: 1.0.2
3
+ Version: 2.0.0
4
4
  Summary: Convert YouTube videos, playlists, and web pages into Obsidian markdown notes using LLMs
5
5
  Project-URL: Homepage, https://github.com/moneytosms/notegen
6
6
  Project-URL: Bug Tracker, https://github.com/moneytosms/notegen/issues
@@ -26,12 +26,15 @@ Requires-Dist: rich>=13
26
26
  Requires-Dist: tiktoken>=0.7
27
27
  Requires-Dist: trafilatura>=1.12
28
28
  Requires-Dist: typer>=0.12
29
+ Requires-Dist: watchfiles>=1.2.0
29
30
  Requires-Dist: youtube-transcript-api>=0.6
30
31
  Requires-Dist: yt-dlp>=2024.1
31
32
  Description-Content-Type: text/markdown
32
33
 
33
34
  # notegen
34
35
 
36
+ [![CI](https://github.com/moneytosms/notegen/actions/workflows/ci.yml/badge.svg)](https://github.com/moneytosms/notegen/actions/workflows/ci.yml)
37
+
35
38
  Convert YouTube videos, playlists, and web pages into structured Obsidian-flavored markdown notes using LLMs.
36
39
 
37
40
  ## Install
@@ -49,7 +52,10 @@ notegen config init
49
52
  # 2. Open config and add your API key
50
53
  notegen config open
51
54
 
52
- # 3. Generate notes
55
+ # 3. Verify everything works
56
+ notegen doctor
57
+
58
+ # 4. Generate notes
53
59
  notegen https://youtube.com/watch?v=...
54
60
  ```
55
61
 
@@ -64,25 +70,46 @@ notegen transcript.txt
64
70
 
65
71
  # Explicit commands
66
72
  notegen video <youtube-url>
67
- notegen playlist <playlist-url> [--force]
73
+ notegen playlist <playlist-url> [--force] [--force-restart]
68
74
  notegen web <url>
69
75
  notegen text <file-or-stdin>
70
76
  notegen text - # stdin
71
77
 
78
+ # Watch a folder — auto-process new .txt/.md files
79
+ notegen watch ./inbox
80
+
81
+ # Dry run — estimate tokens/cost without calling LLM
82
+ notegen -n https://youtube.com/watch?v=...
83
+ notegen video <url> --dry-run
84
+
85
+ # Output format
86
+ notegen video <url> --format logseq
87
+ notegen web <url> --format plain
88
+
72
89
  # Config
73
- notegen config init # create config file
74
- notegen config open # open config in your default editor
75
- notegen config show # print resolved config
90
+ notegen config init # create config file
91
+ notegen config open # open config in your default editor
92
+ notegen config show # print resolved config
93
+ notegen config validate # check structure + API key presence
94
+ notegen doctor # config check + real test API call
95
+
96
+ # Cache
97
+ notegen cache clear # remove ~/.cache/notegen/
76
98
  ```
77
99
 
78
100
  ## Options
79
101
 
80
102
  | Flag | Description |
81
103
  |---|---|
82
- | `-o / --output-dir` | Override output directory |
83
- | `-m / --model` | LiteLLM model string (e.g. `groq/llama-3.3-70b-versatile`) |
104
+ | `-o / --output-dir PATH` | Override output directory |
105
+ | `-m / --model TEXT` | LiteLLM model string (e.g. `groq/llama-3.3-70b-versatile`) |
106
+ | `-v / --verbose` | Show chunk count, token usage, model/key selection, crawl status |
84
107
  | `--no-mermaid` | Disable mermaid diagram generation |
108
+ | `--no-cache` | Skip cache read/write for this run |
109
+ | `-n / --dry-run` | Print token/cost estimate; skip LLM call |
110
+ | `--format TEXT` | Output format: `obsidian` (default) · `logseq` · `plain` · `roam` |
85
111
  | `--force` | Skip playlist videos without captions instead of aborting |
112
+ | `--force-restart` | Ignore playlist resume file, reprocess all videos |
86
113
 
87
114
  ## Config file
88
115
 
@@ -106,6 +133,19 @@ model: anthropic/claude-sonnet-4-6
106
133
  output_dir: ~/notes
107
134
  mermaid: true
108
135
 
136
+ # Output format: obsidian (default) | logseq | plain | roam
137
+ output_format: obsidian
138
+
139
+ # Caching — transcripts + LLM output cached in ~/.cache/notegen/
140
+ # Set to false to always re-fetch and re-generate
141
+ cache: true
142
+
143
+ # Token budget — compress output if it exceeds this many tokens (0 = no limit)
144
+ max_output_tokens: 0
145
+
146
+ # Fuzzy dedup — skip near-duplicate sections in merged notes (Jaccard threshold)
147
+ merger_similarity_threshold: 0.7
148
+
109
149
  # API key rotation — add multiple keys per provider.
110
150
  # notegen picks one at random each request (useful for free-tier rate limits).
111
151
  api_keys:
@@ -139,8 +179,6 @@ web_max_pages: 50
139
179
  web_max_depth: 3
140
180
 
141
181
  # Rate limiting & retry (important for free-tier providers like Groq, Gemini)
142
- # On a 429 error: cools down the offending key, rotates to another if available,
143
- # otherwise waits using Retry-After header or exponential backoff.
144
182
  max_retries: 5
145
183
  retry_base_delay: 60.0 # seconds; backoff = base * 2^attempt
146
184
  ```
@@ -164,6 +202,72 @@ retry_base_delay: 60.0 # seconds; backoff = base * 2^attempt
164
202
 
165
203
  Any provider supported by [LiteLLM](https://docs.litellm.ai/docs/providers) works.
166
204
 
205
+ ## Env var API keys
206
+
207
+ As an alternative to the config file, set `NOTEGEN_<PROVIDER>_KEY` env vars. These are used as fallback when no keys are configured for a provider:
208
+
209
+ ```bash
210
+ export NOTEGEN_GROQ_KEY=gsk_...
211
+ export NOTEGEN_ANTHROPIC_KEY=sk-ant-...
212
+ export NOTEGEN_GEMINI_KEY=AIzaSy...
213
+ ```
214
+
215
+ Config keys take priority over env vars. Env vars are useful for CI or server use.
216
+
217
+ ## Caching
218
+
219
+ Transcripts and LLM-generated notes are cached in `~/.cache/notegen/` (keyed on URL + model). Re-running the same source skips fetch and LLM calls entirely.
220
+
221
+ ```bash
222
+ notegen video <url> # first run: fetches + generates + caches
223
+ notegen video <url> # second run: serves from cache instantly
224
+ notegen video <url> --no-cache # bypass cache for this run
225
+ notegen cache clear # wipe all cached files
226
+ ```
227
+
228
+ ## Dry run
229
+
230
+ Estimate tokens and cost before committing to a run:
231
+
232
+ ```bash
233
+ notegen -n https://youtube.com/playlist?list=...
234
+ ```
235
+
236
+ Prints a Rich table with chunk count, token count, estimated cost, and estimated generation time. No LLM calls are made, no files are written.
237
+
238
+ ## Output formats
239
+
240
+ Use `--format` to target different note-taking apps:
241
+
242
+ | Format | Syntax style |
243
+ |---|---|
244
+ | `obsidian` (default) | `[[wikilinks]]`, `> [!TIP]` callouts, mermaid diagrams |
245
+ | `logseq` | Bullet-based, `#+BEGIN_TIP` blocks |
246
+ | `plain` | Clean markdown, no app-specific syntax |
247
+ | `roam` | `#[[hashtag refs]]` |
248
+
249
+ ## Watch mode
250
+
251
+ Drop files into a folder and notegen auto-processes them:
252
+
253
+ ```bash
254
+ notegen watch ./inbox --output-dir ./notes
255
+ ```
256
+
257
+ - Processes existing unprocessed `.txt`/`.md` files on startup
258
+ - Watches for new files; processes each as it appears
259
+ - Tracks processed files in `.watch-state.json` (won't reprocess on restart)
260
+ - Ctrl+C exits cleanly
261
+
262
+ ## Playlist resume
263
+
264
+ Long playlists are resumable. Progress is saved to `.progress.json` in the output folder after each video. If a run is interrupted, re-running the same command skips already-completed videos.
265
+
266
+ ```bash
267
+ notegen playlist <url> # resumes from where it left off
268
+ notegen playlist <url> --force-restart # ignore progress, reprocess all
269
+ ```
270
+
167
271
  ## Rate limiting
168
272
 
169
273
  Free-tier providers (Groq, Gemini, Together AI, etc.) enforce strict TPM/RPM limits. notegen handles 429 errors automatically:
@@ -175,12 +279,13 @@ With the defaults (`max_retries: 5`, `retry_base_delay: 60`), the wait sequence
175
279
 
176
280
  ## Output format
177
281
 
178
- Obsidian-flavored markdown:
282
+ Obsidian-flavored markdown (default):
179
283
  - YAML frontmatter (`title`, `source`, `type`, `tags`, `date`)
180
284
  - `##` / `###` headings only
181
285
  - `> [!TIP]` / `> [!WARNING]` callouts
182
286
  - Mermaid diagrams for flows and architectures
183
287
  - `[[wikilinks]]` for cross-references
288
+ - Tags auto-inferred by LLM from content
184
289
  - Playlist → folder + `index.md` with wikilinks to each video note
185
290
 
186
291
  ## Requirements
@@ -0,0 +1,261 @@
1
+ # notegen
2
+
3
+ [![CI](https://github.com/moneytosms/notegen/actions/workflows/ci.yml/badge.svg)](https://github.com/moneytosms/notegen/actions/workflows/ci.yml)
4
+
5
+ Convert YouTube videos, playlists, and web pages into structured Obsidian-flavored markdown notes using LLMs.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install notegen
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```bash
16
+ # 1. Create config
17
+ notegen config init
18
+
19
+ # 2. Open config and add your API key
20
+ notegen config open
21
+
22
+ # 3. Verify everything works
23
+ notegen doctor
24
+
25
+ # 4. Generate notes
26
+ notegen https://youtube.com/watch?v=...
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```bash
32
+ # Auto-detect source type (bare URL or file)
33
+ notegen https://youtube.com/watch?v=...
34
+ notegen https://youtube.com/playlist?list=...
35
+ notegen https://example.com/article
36
+ notegen transcript.txt
37
+
38
+ # Explicit commands
39
+ notegen video <youtube-url>
40
+ notegen playlist <playlist-url> [--force] [--force-restart]
41
+ notegen web <url>
42
+ notegen text <file-or-stdin>
43
+ notegen text - # stdin
44
+
45
+ # Watch a folder — auto-process new .txt/.md files
46
+ notegen watch ./inbox
47
+
48
+ # Dry run — estimate tokens/cost without calling LLM
49
+ notegen -n https://youtube.com/watch?v=...
50
+ notegen video <url> --dry-run
51
+
52
+ # Output format
53
+ notegen video <url> --format logseq
54
+ notegen web <url> --format plain
55
+
56
+ # Config
57
+ notegen config init # create config file
58
+ notegen config open # open config in your default editor
59
+ notegen config show # print resolved config
60
+ notegen config validate # check structure + API key presence
61
+ notegen doctor # config check + real test API call
62
+
63
+ # Cache
64
+ notegen cache clear # remove ~/.cache/notegen/
65
+ ```
66
+
67
+ ## Options
68
+
69
+ | Flag | Description |
70
+ |---|---|
71
+ | `-o / --output-dir PATH` | Override output directory |
72
+ | `-m / --model TEXT` | LiteLLM model string (e.g. `groq/llama-3.3-70b-versatile`) |
73
+ | `-v / --verbose` | Show chunk count, token usage, model/key selection, crawl status |
74
+ | `--no-mermaid` | Disable mermaid diagram generation |
75
+ | `--no-cache` | Skip cache read/write for this run |
76
+ | `-n / --dry-run` | Print token/cost estimate; skip LLM call |
77
+ | `--format TEXT` | Output format: `obsidian` (default) · `logseq` · `plain` · `roam` |
78
+ | `--force` | Skip playlist videos without captions instead of aborting |
79
+ | `--force-restart` | Ignore playlist resume file, reprocess all videos |
80
+
81
+ ## Config file
82
+
83
+ ### Location
84
+
85
+ | OS | Path |
86
+ |---|---|
87
+ | Linux | `~/.config/notes-gen/config.yaml` |
88
+ | macOS | `~/.config/notes-gen/config.yaml` |
89
+ | Windows | `%USERPROFILE%\.config\notes-gen\config.yaml` |
90
+
91
+ Run `notegen config init` to generate a fully-commented template, then `notegen config open` to edit it.
92
+
93
+ ### Full reference (`~/.config/notes-gen/config.yaml`)
94
+
95
+ ```yaml
96
+ # Active model — format: <provider>/<model-name>
97
+ model: anthropic/claude-sonnet-4-6
98
+
99
+ # Output
100
+ output_dir: ~/notes
101
+ mermaid: true
102
+
103
+ # Output format: obsidian (default) | logseq | plain | roam
104
+ output_format: obsidian
105
+
106
+ # Caching — transcripts + LLM output cached in ~/.cache/notegen/
107
+ # Set to false to always re-fetch and re-generate
108
+ cache: true
109
+
110
+ # Token budget — compress output if it exceeds this many tokens (0 = no limit)
111
+ max_output_tokens: 0
112
+
113
+ # Fuzzy dedup — skip near-duplicate sections in merged notes (Jaccard threshold)
114
+ merger_similarity_threshold: 0.7
115
+
116
+ # API key rotation — add multiple keys per provider.
117
+ # notegen picks one at random each request (useful for free-tier rate limits).
118
+ api_keys:
119
+ anthropic:
120
+ - sk-ant-api03-KEY1
121
+ - sk-ant-api03-KEY2 # second key rotated in automatically
122
+ groq:
123
+ - gsk_KEY1
124
+ openai:
125
+ - sk-proj-KEY1
126
+ gemini:
127
+ - AIzaSyKEY1
128
+ nvidia_nim:
129
+ - nvapi-KEY1
130
+ mistral:
131
+ - KEY1
132
+ cohere:
133
+ - KEY1
134
+ together_ai:
135
+ - KEY1
136
+ deepseek:
137
+ - sk-KEY1
138
+ perplexity:
139
+ - pplx-KEY1
140
+ xai:
141
+ - xai-KEY1
142
+
143
+ # Web crawl limits
144
+ max_concurrent: 5
145
+ web_max_pages: 50
146
+ web_max_depth: 3
147
+
148
+ # Rate limiting & retry (important for free-tier providers like Groq, Gemini)
149
+ max_retries: 5
150
+ retry_base_delay: 60.0 # seconds; backoff = base * 2^attempt
151
+ ```
152
+
153
+ ### Supported providers
154
+
155
+ | Provider | Model string example |
156
+ |---|---|
157
+ | Anthropic | `anthropic/claude-sonnet-4-6` |
158
+ | OpenAI | `openai/gpt-4o` |
159
+ | Groq | `groq/llama-3.3-70b-versatile` |
160
+ | Google Gemini | `gemini/gemini-2.0-flash` |
161
+ | NVIDIA NIM | `nvidia_nim/meta/llama-3.1-70b-instruct` |
162
+ | Mistral | `mistral/mistral-large-latest` |
163
+ | Cohere | `cohere/command-r-plus` |
164
+ | Together AI | `together_ai/meta-llama/Llama-3-70b-chat-hf` |
165
+ | DeepSeek | `deepseek/deepseek-chat` |
166
+ | Perplexity | `perplexity/sonar-pro` |
167
+ | xAI (Grok) | `xai/grok-2` |
168
+ | Ollama (local) | `ollama/llama3` |
169
+
170
+ Any provider supported by [LiteLLM](https://docs.litellm.ai/docs/providers) works.
171
+
172
+ ## Env var API keys
173
+
174
+ As an alternative to the config file, set `NOTEGEN_<PROVIDER>_KEY` env vars. These are used as fallback when no keys are configured for a provider:
175
+
176
+ ```bash
177
+ export NOTEGEN_GROQ_KEY=gsk_...
178
+ export NOTEGEN_ANTHROPIC_KEY=sk-ant-...
179
+ export NOTEGEN_GEMINI_KEY=AIzaSy...
180
+ ```
181
+
182
+ Config keys take priority over env vars. Env vars are useful for CI or server use.
183
+
184
+ ## Caching
185
+
186
+ Transcripts and LLM-generated notes are cached in `~/.cache/notegen/` (keyed on URL + model). Re-running the same source skips fetch and LLM calls entirely.
187
+
188
+ ```bash
189
+ notegen video <url> # first run: fetches + generates + caches
190
+ notegen video <url> # second run: serves from cache instantly
191
+ notegen video <url> --no-cache # bypass cache for this run
192
+ notegen cache clear # wipe all cached files
193
+ ```
194
+
195
+ ## Dry run
196
+
197
+ Estimate tokens and cost before committing to a run:
198
+
199
+ ```bash
200
+ notegen -n https://youtube.com/playlist?list=...
201
+ ```
202
+
203
+ Prints a Rich table with chunk count, token count, estimated cost, and estimated generation time. No LLM calls are made, no files are written.
204
+
205
+ ## Output formats
206
+
207
+ Use `--format` to target different note-taking apps:
208
+
209
+ | Format | Syntax style |
210
+ |---|---|
211
+ | `obsidian` (default) | `[[wikilinks]]`, `> [!TIP]` callouts, mermaid diagrams |
212
+ | `logseq` | Bullet-based, `#+BEGIN_TIP` blocks |
213
+ | `plain` | Clean markdown, no app-specific syntax |
214
+ | `roam` | `#[[hashtag refs]]` |
215
+
216
+ ## Watch mode
217
+
218
+ Drop files into a folder and notegen auto-processes them:
219
+
220
+ ```bash
221
+ notegen watch ./inbox --output-dir ./notes
222
+ ```
223
+
224
+ - Processes existing unprocessed `.txt`/`.md` files on startup
225
+ - Watches for new files; processes each as it appears
226
+ - Tracks processed files in `.watch-state.json` (won't reprocess on restart)
227
+ - Ctrl+C exits cleanly
228
+
229
+ ## Playlist resume
230
+
231
+ Long playlists are resumable. Progress is saved to `.progress.json` in the output folder after each video. If a run is interrupted, re-running the same command skips already-completed videos.
232
+
233
+ ```bash
234
+ notegen playlist <url> # resumes from where it left off
235
+ notegen playlist <url> --force-restart # ignore progress, reprocess all
236
+ ```
237
+
238
+ ## Rate limiting
239
+
240
+ Free-tier providers (Groq, Gemini, Together AI, etc.) enforce strict TPM/RPM limits. notegen handles 429 errors automatically:
241
+
242
+ 1. Cools down the offending key and rotates to another available key immediately.
243
+ 2. If all keys for the provider are exhausted, waits using the `Retry-After` header value (if present) or exponential backoff (`retry_base_delay * 2^attempt`), then retries.
244
+
245
+ With the defaults (`max_retries: 5`, `retry_base_delay: 60`), the wait sequence is 60s → 120s → 240s → 480s → 960s. Adding multiple API keys from different free accounts is the most effective way to stay under limits.
246
+
247
+ ## Output format
248
+
249
+ Obsidian-flavored markdown (default):
250
+ - YAML frontmatter (`title`, `source`, `type`, `tags`, `date`)
251
+ - `##` / `###` headings only
252
+ - `> [!TIP]` / `> [!WARNING]` callouts
253
+ - Mermaid diagrams for flows and architectures
254
+ - `[[wikilinks]]` for cross-references
255
+ - Tags auto-inferred by LLM from content
256
+ - Playlist → folder + `index.md` with wikilinks to each video note
257
+
258
+ ## Requirements
259
+
260
+ - Python ≥ 3.11
261
+ - API key for at least one supported LLM provider
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+
8
+ _CACHE_DIR = Path.home() / ".cache" / "notegen"
9
+
10
+
11
+ def _key(data: str) -> str:
12
+ return hashlib.sha256(data.encode()).hexdigest()
13
+
14
+
15
+ def _cache_file(key: str) -> Path:
16
+ return _CACHE_DIR / f"{key}.json"
17
+
18
+
19
+ def _read(key: str) -> str | None:
20
+ f = _cache_file(key)
21
+ if not f.exists():
22
+ return None
23
+ try:
24
+ return json.loads(f.read_text(encoding="utf-8")).get("content")
25
+ except Exception:
26
+ return None
27
+
28
+
29
+ def _write(key: str, content: str, url: str) -> None:
30
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
31
+ _cache_file(key).write_text(
32
+ json.dumps({
33
+ "content": content,
34
+ "url": url,
35
+ "cached_at": datetime.now(timezone.utc).isoformat(),
36
+ }),
37
+ encoding="utf-8",
38
+ )
39
+
40
+
41
+ def get_transcript_cache(url: str) -> str | None:
42
+ return _read(_key(f"transcript:{url}"))
43
+
44
+
45
+ def set_transcript_cache(url: str, text: str) -> None:
46
+ _write(_key(f"transcript:{url}"), text, url)
47
+
48
+
49
+ def get_notes_cache(url: str, model: str) -> str | None:
50
+ return _read(_key(f"notes:{url}:{model}"))
51
+
52
+
53
+ def set_notes_cache(url: str, model: str, notes: str) -> None:
54
+ _write(_key(f"notes:{url}:{model}"), notes, url)
55
+
56
+
57
+ def clear_cache() -> int:
58
+ if not _CACHE_DIR.exists():
59
+ return 0
60
+ files = list(_CACHE_DIR.glob("*.json"))
61
+ for f in files:
62
+ f.unlink()
63
+ return len(files)