yt-briefing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Usage:
4
+ * bun src/yt-rating.ts --rating 0|1 [--comment "..."]
5
+ *
6
+ * Channel / id / title / type default to <DATA_DIR>/.cache/pending.json (written by
7
+ * yt-sweep.ts) so the agent only passes --rating (+ optional --comment) — no fragile
8
+ * shell quoting of emoji/quote-laden titles. Explicit flags override:
9
+ * --channel @X --id Y --title "..." --type longform|short|live [--baseline] [--cap 10] [--no-state]
10
+ *
11
+ * Rating model (no positive rating — keeping the channel is the implicit positive):
12
+ * 1 = neutral → bump the state pointer only (video seen, no signal), profile untouched.
13
+ * 0 = worthless → append a negative few-shot to `## Skip titles` (FIFO cap, default 10).
14
+ * comment → append a durable rule to `## Notes`, seen by both filters.
15
+ *
16
+ * Direct durable commit — no rolling buffer, no consolidation. Idempotent: identical
17
+ * bullets are de-duplicated; a state.md re-bump is a no-op.
18
+ */
19
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
20
+ import dotenv from 'dotenv';
21
+ import { parseChannels, appendSkipTitle, appendNote, bumpStatePointer } from "./lib/yt-lib.js";
22
+ import { CHANNELS_MD, STATE_MD, PENDING_FILE, ENV_PATH, profilePath } from "./lib/paths.js";
23
+ dotenv.config({ path: ENV_PATH });
24
+ function getArg(args, name) {
25
+ const idx = args.indexOf(name);
26
+ return idx !== -1 && args[idx + 1] !== undefined ? args[idx + 1] : null;
27
+ }
28
+ /**
29
+ * Video metadata defaults to .cache/pending.json (written by yt-sweep.ts) so the agent
30
+ * only needs to pass --rating (+ optional --comment). Explicit flags still override.
31
+ */
32
+ function loadPending() {
33
+ if (!existsSync(PENDING_FILE))
34
+ return {};
35
+ try {
36
+ return JSON.parse(readFileSync(PENDING_FILE, 'utf8'));
37
+ }
38
+ catch {
39
+ return {};
40
+ }
41
+ }
42
+ function parseArgs(argv) {
43
+ const pending = loadPending();
44
+ const channel = getArg(argv, '--channel') ?? pending.channel ?? null;
45
+ const id = getArg(argv, '--id') ?? pending.videoId ?? null;
46
+ const title = getArg(argv, '--title') ?? pending.title ?? null;
47
+ const type = getArg(argv, '--type') ?? pending.type ?? null;
48
+ const ratingRaw = getArg(argv, '--rating');
49
+ const comment = getArg(argv, '--comment') ?? '';
50
+ const baseline = argv.includes('--baseline') || pending.is_baseline === true;
51
+ const noState = argv.includes('--no-state');
52
+ const capRaw = getArg(argv, '--cap');
53
+ if (!channel || !id || !title || !type || !ratingRaw) {
54
+ console.error('Usage: yt-briefing rate --rating 0|1 [--comment "..."] (channel/id/title/type default to .cache/pending.json; override with --channel @X --id Y --title "..." --type longform|short|live) [--baseline] [--cap 10] [--no-state]');
55
+ process.exit(1);
56
+ }
57
+ if (!['longform', 'short', 'live'].includes(type)) {
58
+ console.error(`Invalid --type: ${type}`);
59
+ process.exit(1);
60
+ }
61
+ const rating = parseInt(ratingRaw, 10);
62
+ // Permissive 0..5 so older profiles / scripts keep working; the live UI emits only 0|1.
63
+ if (!Number.isFinite(rating) || rating < 0 || rating > 5) {
64
+ console.error(`Invalid --rating: ${ratingRaw} (must be 0 or 1)`);
65
+ process.exit(1);
66
+ }
67
+ const cap = capRaw ? parseInt(capRaw, 10) : 10;
68
+ return { channel, id, title, type: type, rating, comment, baseline, cap, noState };
69
+ }
70
+ const args = parseArgs(process.argv.slice(2));
71
+ const channels = parseChannels(readFileSync(CHANNELS_MD, 'utf8'));
72
+ const ch = channels.find(c => c.handle === args.channel);
73
+ if (!ch) {
74
+ console.error(`Channel ${args.channel} not found in channels.md`);
75
+ process.exit(1);
76
+ }
77
+ const profile = profilePath(ch.slug);
78
+ if (!existsSync(profile)) {
79
+ console.error(`Profile not found: ${profile}`);
80
+ process.exit(1);
81
+ }
82
+ const date = new Date().toISOString().slice(0, 10);
83
+ // 1. Durable profile writes (no buffer, no consolidation):
84
+ // rating=0 → negative few-shot; comment → Notes rule. rating=1 w/o comment → nothing.
85
+ const profileBefore = readFileSync(profile, 'utf8');
86
+ let profileAfter = profileBefore;
87
+ if (args.rating === 0) {
88
+ profileAfter = appendSkipTitle(profileAfter, { title: args.title, type: args.type }, args.cap);
89
+ }
90
+ if (args.comment && args.comment.trim()) {
91
+ profileAfter = appendNote(profileAfter, args.comment.trim());
92
+ }
93
+ if (profileAfter !== profileBefore) {
94
+ writeFileSync(profile, profileAfter, 'utf8');
95
+ }
96
+ // 2. Bump state.md pointer (unless --no-state)
97
+ let stateBumped = false;
98
+ if (!args.noState) {
99
+ const stateBefore = readFileSync(STATE_MD, 'utf8');
100
+ const stateAfter = bumpStatePointer(stateBefore, args.channel, args.type, args.id, date);
101
+ if (stateAfter !== stateBefore) {
102
+ writeFileSync(STATE_MD, stateAfter, 'utf8');
103
+ stateBumped = true;
104
+ }
105
+ }
106
+ console.log(JSON.stringify({
107
+ ok: true,
108
+ profile: `channels/${ch.slug}.md`,
109
+ state_bumped: stateBumped,
110
+ }));
@@ -0,0 +1,546 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * yt-sweep.ts — lazy briefing engine. ONE invocation advances to the next video that
4
+ * needs a rating (or reports done / rate_limited). All control flow, gate logic,
5
+ * transcript fetch, and LLM calls (title-filter classification + content-filter summary)
6
+ * live here. The frontend (skill or CLI) only renders the summary and collects the
7
+ * rating — zero loop logic outside this file, no subprocess LLM.
8
+ *
9
+ * Usage:
10
+ * bun src/yt-sweep.ts [--reset]
11
+ * bun src/yt-sweep.ts --prefetch <videoId> (internal: detached next-video warmup)
12
+ * bun src/yt-sweep.ts --fill (internal: detached queue builder)
13
+ *
14
+ * Output (stdout, single JSON line):
15
+ * {"status":"rating_needed","summary":"<md>","pending":{channel,videoId,title,type,publishedAt,is_baseline}}
16
+ * {"status":"done"}
17
+ * {"status":"rate_limited"}
18
+ *
19
+ * The engine ONLY writes files under DATA_DIR — it never runs git or any VCS. If you
20
+ * want your briefing state versioned, commit DATA_DIR yourself (or point YT_DATA_DIR at
21
+ * a synced folder). Keeping persistence out of the engine is deliberate: it stays a pure
22
+ * data tool with zero host coupling.
23
+ *
24
+ * State model (no module-global cache — each call is a fresh process):
25
+ * - <DATA_DIR>/state.md durable source of truth for what's been rated/skipped.
26
+ * ONLY the foreground process writes it.
27
+ * - <DATA_DIR>/.cache/queue.json lazy per-session queue: `channels_todo` (not yet
28
+ * expanded) + `items` (expanded, kept videos awaiting a
29
+ * rating) + `seen`. Foreground is the SOLE writer.
30
+ * Tagged with built_at; auto-rebuilt on a new day or via --reset.
31
+ * - <DATA_DIR>/.cache/queue-rest.json background-fill handoff written by the --fill child.
32
+ * - <DATA_DIR>/.cache/pending.json current ratable video's metadata for yt-rating.ts.
33
+ * - <DATA_DIR>/.cache/prefetch.json background-computed summary for the NEXT video.
34
+ *
35
+ * All .cache/ files are throwaway (rebuilt each run; safe to delete / gitignore).
36
+ *
37
+ * Lazy build + background fill (fast first paint): the first call lists the channels
38
+ * (cheap) and spawns a detached `--fill` child that expands EVERY channel in parallel
39
+ * into queue-rest.json. Meanwhile the foreground expands just enough channels to emit
40
+ * the FIRST ratable video. Concurrency is safe: the foreground solely owns queue.json +
41
+ * state.md; the --fill child only writes queue-rest.json and serializes its title-skips.
42
+ *
43
+ * Prefetch: after emitting `rating_needed`, the engine spawns a detached
44
+ * `--prefetch <nextVideoId>` child that fetches + summarizes the next video WHILE the
45
+ * user rates the current one, caching it in prefetch.json.
46
+ *
47
+ * Crash-safety: a crash before the rating leaves state.md unbumped and the queue head
48
+ * intact → the next call re-derives and reprocesses just that one video.
49
+ */
50
+ import { readFileSync, writeFileSync, existsSync, rmSync, mkdirSync, renameSync, appendFileSync } from 'node:fs';
51
+ import { spawn } from 'node:child_process';
52
+ import dotenv from 'dotenv';
53
+ import { parseChannels, parseState, bumpStatePointer } from "./lib/yt-lib.js";
54
+ import { chat, getModel } from "./lib/llm.js";
55
+ import { outputLang } from "./lib/config.js";
56
+ import { PKG_ROOT, ENV_PATH, CHANNELS_MD, STATE_MD, CACHE_DIR, QUEUE_FILE, REST_FILE, PENDING_FILE, PREFETCH_FILE, LOG_FILE, profilePath, script, } from "./lib/paths.js";
57
+ dotenv.config({ path: ENV_PATH });
58
+ mkdirSync(CACHE_DIR, { recursive: true });
59
+ // Re-invoke sibling scripts with the SAME runtime that launched us (bun/node/deno),
60
+ // never a hardcoded binary — the tool must run wherever the user installed it.
61
+ const RUNTIME = process.execPath;
62
+ // Max channels expanded concurrently — both the foreground first-paint waves and the
63
+ // background --fill. Cold channel expansion is network-bound (a few fetch round trips),
64
+ // so a wave overlaps the latencies instead of paying them sequentially.
65
+ const CONCURRENCY = 6;
66
+ const argv = process.argv.slice(2);
67
+ const reset = argv.includes('--reset');
68
+ const fillMode = argv.includes('--fill');
69
+ const pfIdx = argv.indexOf('--prefetch');
70
+ const prefetchTarget = pfIdx !== -1 ? argv[pfIdx + 1] : null;
71
+ const today = new Date().toISOString().slice(0, 10);
72
+ const LANG = outputLang();
73
+ // Diagnostics sink. Default: silent (stdout stays a pure JSON line — the caller never
74
+ // has to redirect anything, so no /tmp). With YT_DEBUG set, timing + child stderr append
75
+ // to <DATA_DIR>/.cache/sweep.log (gitignored) — never an OS temp dir.
76
+ const DEBUG = !!process.env.YT_DEBUG;
77
+ const T0 = Date.now();
78
+ const log = (msg) => { if (DEBUG)
79
+ appendFileSync(LOG_FILE, `⏱ ${msg} (+${Date.now() - T0}ms)\n`); };
80
+ // ---------- subprocess helper ----------
81
+ function run(cmd) {
82
+ return new Promise((resolve, reject) => {
83
+ const [exe, ...args] = cmd;
84
+ const p = spawn(exe, args, { cwd: PKG_ROOT, env: { ...process.env } });
85
+ let stdout = '';
86
+ p.stdout.on('data', d => { stdout += d.toString(); });
87
+ // Child stderr → the gitignored debug log only (never parent stderr / stdout), so a
88
+ // bare invocation emits nothing but the JSON line. Silent unless YT_DEBUG.
89
+ if (DEBUG)
90
+ p.stderr.on('data', d => appendFileSync(LOG_FILE, d.toString()));
91
+ else
92
+ p.stderr.resume(); // drain so the child never blocks on a full pipe
93
+ p.on('close', code => resolve({ stdout, code: code ?? 1 }));
94
+ p.on('error', reject);
95
+ });
96
+ }
97
+ // ---------- state mutation (inline, crash-safe per write) ----------
98
+ function bumpState(handle, type, videoId) {
99
+ const before = readFileSync(STATE_MD, 'utf8');
100
+ const after = bumpStatePointer(before, handle, type, videoId, today);
101
+ if (after !== before)
102
+ writeFileSync(STATE_MD, after, 'utf8');
103
+ }
104
+ /**
105
+ * Persist a skip. no_transcript does NOT bump the pointer (transcript may appear
106
+ * later → retry on the next run); every other skip advances it.
107
+ */
108
+ function persistSkip(item, status) {
109
+ if (status !== 'no_transcript')
110
+ bumpState(item.channel, item.type, item.videoId);
111
+ }
112
+ /**
113
+ * Apply title-skips to state.md. Always bumps the pointer (a title-skip is final).
114
+ * Idempotent — re-applying the same skip is a no-op. ONLY the foreground calls this.
115
+ */
116
+ function applyTitleSkips(skips) {
117
+ for (const s of skips)
118
+ bumpState(s.channel, s.type, s.videoId);
119
+ }
120
+ /** Append items, skipping any videoId already queued OR already resolved (seen). */
121
+ function enqueue(queue, add) {
122
+ const have = new Set([...queue.items.map(i => i.videoId), ...queue.seen]);
123
+ for (const it of add)
124
+ if (!have.has(it.videoId)) {
125
+ queue.items.push(it);
126
+ have.add(it.videoId);
127
+ }
128
+ }
129
+ /** Pop the head and record it as resolved so a later background merge won't re-add it. */
130
+ function dropHead(queue) {
131
+ const v = queue.items.shift();
132
+ if (v)
133
+ queue.seen.push(v.videoId);
134
+ }
135
+ function loadPrefetch(videoId) {
136
+ if (!existsSync(PREFETCH_FILE))
137
+ return null;
138
+ try {
139
+ const p = JSON.parse(readFileSync(PREFETCH_FILE, 'utf8'));
140
+ if (p.videoId === videoId && p.built_at === today)
141
+ return p.summary;
142
+ }
143
+ catch { /* corrupt → ignore */ }
144
+ return null;
145
+ }
146
+ /** Atomic write (temp + rename) so a concurrent reader never sees a partial file. */
147
+ function writePrefetch(p) {
148
+ const tmp = `${PREFETCH_FILE}.${process.pid}.tmp`;
149
+ writeFileSync(tmp, JSON.stringify(p));
150
+ renameSync(tmp, PREFETCH_FILE);
151
+ }
152
+ function clearPrefetch() {
153
+ if (existsSync(PREFETCH_FILE))
154
+ rmSync(PREFETCH_FILE);
155
+ }
156
+ /**
157
+ * Spawn a detached child that warms the prefetch cache for `next` while the user rates
158
+ * the current video. Best-effort: failures are silent, the foreground always falls back
159
+ * to a live fetch. The child outlives this process (detached + unref).
160
+ */
161
+ function spawnPrefetch(next) {
162
+ if (!next)
163
+ return;
164
+ const child = spawn(RUNTIME, [script('yt-sweep'), '--prefetch', next.videoId], {
165
+ cwd: PKG_ROOT,
166
+ env: { ...process.env },
167
+ detached: true,
168
+ stdio: 'ignore',
169
+ });
170
+ child.unref();
171
+ }
172
+ // ---------- background queue fill (the rest of the channels, in parallel) ----------
173
+ function loadRest() {
174
+ if (!existsSync(REST_FILE))
175
+ return null;
176
+ try {
177
+ const r = JSON.parse(readFileSync(REST_FILE, 'utf8'));
178
+ return r.built_at === today ? r : null;
179
+ }
180
+ catch {
181
+ return null;
182
+ }
183
+ }
184
+ /** Atomic write (temp + rename) so the foreground never reads a partial file. */
185
+ function writeRest(r) {
186
+ const tmp = `${REST_FILE}.${process.pid}.tmp`;
187
+ writeFileSync(tmp, JSON.stringify(r));
188
+ renameSync(tmp, REST_FILE);
189
+ }
190
+ function clearRest() {
191
+ if (existsSync(REST_FILE))
192
+ rmSync(REST_FILE);
193
+ }
194
+ /**
195
+ * Spawn a detached `--fill` child that expands ALL channels in parallel while the
196
+ * foreground emits the first video. The child writes queue-rest.json only (never touches
197
+ * queue.json / state.md). Best-effort: if it dies, the foreground expands channels itself.
198
+ */
199
+ function spawnBackgroundFill() {
200
+ const child = spawn(RUNTIME, [script('yt-sweep'), '--fill'], {
201
+ cwd: PKG_ROOT,
202
+ env: { ...process.env },
203
+ detached: true,
204
+ stdio: 'ignore',
205
+ });
206
+ child.unref();
207
+ }
208
+ function emit(obj) {
209
+ log(`EXIT status=${obj.status}`);
210
+ process.stdout.write(JSON.stringify(obj));
211
+ process.exit(0);
212
+ }
213
+ // ---------- LLM gates ----------
214
+ /** Title filter: batch-classify a channel's non-baseline titles. Falls back to keep-all on any error. */
215
+ async function runTitleFilter(profilePathAbs, videos) {
216
+ const skip = new Set();
217
+ if (!existsSync(profilePathAbs))
218
+ return skip;
219
+ const profile = readFileSync(profilePathAbs, 'utf8');
220
+ if (!/##\s*Skip titles/.test(profile))
221
+ return skip;
222
+ const toClassify = videos.filter(v => !v.is_baseline);
223
+ if (toClassify.length === 0)
224
+ return skip;
225
+ const prompt = `Title-filter batch classification for a YouTube briefing tool.
226
+
227
+ Channel profile:
228
+ ${profile}
229
+
230
+ Focus on the '## Skip titles' section (titles to skip) and any '## Notes' rules.
231
+ Keep by default — only skip a title that clearly matches the worthless pattern. If that section is missing or empty: classify all as keep.
232
+
233
+ Videos:
234
+ ${JSON.stringify(toClassify.map(v => ({ id: v.videoId, title: v.title, type: v.type })))}
235
+
236
+ Output ONLY a raw JSON array (no markdown fences, no explanation):
237
+ [{"id":"VIDEO_ID","result":"keep"},{"id":"VIDEO_ID","result":"skip","reason":"max 12 words"},...]`;
238
+ let out;
239
+ try {
240
+ out = await chat(prompt, {
241
+ system: "You are a video title classifier. Output ONLY a raw JSON array as instructed. No markdown fences, no explanation.",
242
+ model: getModel(),
243
+ temperature: 0,
244
+ });
245
+ }
246
+ catch {
247
+ return skip;
248
+ }
249
+ try {
250
+ const start = out.indexOf('['), end = out.lastIndexOf(']');
251
+ if (start === -1 || end === -1)
252
+ return skip;
253
+ const parsed = JSON.parse(out.slice(start, end + 1));
254
+ for (const r of parsed)
255
+ if (r.result === 'skip')
256
+ skip.add(r.id);
257
+ }
258
+ catch { /* keep-all */ }
259
+ return skip;
260
+ }
261
+ /** Content filter: substance check + summary in the configured language. Returns markdown, or 'OFFTOPIC: <reason>'. */
262
+ async function runContentFilter(item, transcript) {
263
+ const profile = existsSync(item.profile_path) ? readFileSync(item.profile_path, 'utf8') : '';
264
+ const baselineNote = item.is_baseline ? ' · baseline' : '';
265
+ const profileSection = profile
266
+ ? `\nChannel profile (sections to honor: Channel policy, Summary format, Cut sections, Episode types, Notes):\n${profile}\n`
267
+ : '';
268
+ const prompt = `Write a summary of this YouTube video in ${LANG}, OR return 'OFFTOPIC: <reason>' if the transcript clearly does not match what the title/channel promises.
269
+
270
+ Video:
271
+ - videoId: ${item.videoId}
272
+ - channel: ${item.channel}
273
+ - title: ${item.title}
274
+ - type: ${item.type}
275
+ - publishedAt: ${item.publishedAt}
276
+ - url: https://youtube.com/watch?v=${item.videoId}
277
+
278
+ Transcript:
279
+ ${transcript}
280
+ ${profileSection}
281
+ Steps:
282
+ 1. Substance check: does the transcript actually deliver what the title promises? If clearly not → output ONLY 'OFFTOPIC: <short reason>' and stop.
283
+ 2. Otherwise write the summary:
284
+ - Header: ### ${item.channel} — "${item.title}"
285
+ - Subtitle: _${item.publishedAt} · ${item.type} · https://youtube.com/watch?v=${item.videoId}${baselineNote}_
286
+ - 2-5 numbered thematic sections × 2-5 sentences each
287
+ - At most 5-8 short quotes from the transcript
288
+ - No timestamps
289
+ 3. Language: natural ${LANG}. Avoid calques/anglicisms; use foreign words only for proper nouns or established technical terms. Section headers should be verb phrases, not noun stacks.
290
+ 4. Output: ONLY the summary OR 'OFFTOPIC: ...'. No preamble, no trailing commentary.`;
291
+ return await chat(prompt, {
292
+ system: `You are a video summarizer writing in ${LANG}. Follow the task instructions exactly. Output only the summary or 'OFFTOPIC: <reason>'. No preamble, no commentary.`,
293
+ model: getModel(),
294
+ });
295
+ }
296
+ // ---------- queue build (lazy: list channels now, expand on demand) ----------
297
+ /** Map channels.md → channel refs (handle + absolute profile path). */
298
+ function channelRefs() {
299
+ return parseChannels(readFileSync(CHANNELS_MD, 'utf8'))
300
+ .map(c => ({ handle: c.handle, profile_path: profilePath(c.slug) }));
301
+ }
302
+ /**
303
+ * Expand one channel: fetch its pending videos + run the title filter. PURE w.r.t.
304
+ * state.md / queue.json — returns kept items and the title-skips for the caller to
305
+ * persist. Shared by the foreground fallback and the background --fill child.
306
+ */
307
+ async function expandChannel(ref) {
308
+ const t = Date.now();
309
+ const { stdout, code } = await run([RUNTIME, script('yt-channel-pending'), ref.handle]);
310
+ log(` pending ${ref.handle} ${Date.now() - t}ms`);
311
+ const videos = code === 0 ? JSON.parse(stdout || '[]') : [];
312
+ if (videos.length === 0)
313
+ return { items: [], skips: [] };
314
+ const candidates = videos.map(v => ({
315
+ channel: ref.handle, profile_path: ref.profile_path,
316
+ videoId: v.videoId, title: v.title, type: v.type,
317
+ publishedAt: v.publishedAt, is_baseline: v.is_baseline,
318
+ }));
319
+ const titleSkip = await runTitleFilter(ref.profile_path, candidates);
320
+ const items = [];
321
+ const skips = [];
322
+ for (const it of candidates) {
323
+ if (titleSkip.has(it.videoId))
324
+ skips.push({ channel: it.channel, type: it.type, videoId: it.videoId });
325
+ else
326
+ items.push(it);
327
+ }
328
+ return { items, skips };
329
+ }
330
+ /**
331
+ * Init the lazy queue (cheap — no API): list every channel as todo, no items yet,
332
+ * then spawn the background fill so the rest is computed while the foreground emits
333
+ * the first video.
334
+ */
335
+ function buildQueue() {
336
+ clearPrefetch(); // fresh run → drop any prefetch left from a previous queue
337
+ clearRest();
338
+ const channels_todo = channelRefs();
339
+ const queue = { built_at: today, channels_todo, items: [], seen: [] };
340
+ writeFileSync(QUEUE_FILE, JSON.stringify(queue));
341
+ log(`queue init: ${channels_todo.length} channels to expand`);
342
+ spawnBackgroundFill();
343
+ return queue;
344
+ }
345
+ /**
346
+ * Merge a ready background fill into the queue: apply its title-skips to state.md,
347
+ * dedup-append its items, and clear channels_todo. No-op if no fresh fill is on disk.
348
+ */
349
+ function mergeRest(queue) {
350
+ const rest = loadRest();
351
+ if (!rest)
352
+ return false;
353
+ applyTitleSkips(rest.skips);
354
+ enqueue(queue, rest.items);
355
+ queue.channels_todo = [];
356
+ clearRest();
357
+ log(`merged background fill: +${rest.items.length} items`);
358
+ return true;
359
+ }
360
+ /**
361
+ * Guarantee the queue has at least one item to look at (or no channels left). First
362
+ * tries to consume the background fill; if it isn't ready, expands channels in the
363
+ * foreground itself so we never block on the background.
364
+ */
365
+ async function ensureItems(queue) {
366
+ mergeRest(queue);
367
+ while (queue.items.length === 0 && queue.channels_todo.length > 0) {
368
+ // Expand a WAVE of channels in parallel rather than one at a time: each cold
369
+ // expansion is a few network round trips, so a wave costs ~one expansion of wall
370
+ // time instead of N sequential ones — that was 70% of cold first-paint. Foreground
371
+ // stays the sole writer: gather the wave, then apply skips + enqueue here.
372
+ const wave = queue.channels_todo.splice(0, CONCURRENCY);
373
+ const results = await Promise.all(wave.map(ref => expandChannel(ref).catch(() => ({ items: [], skips: [] }))));
374
+ for (const { items, skips } of results) {
375
+ applyTitleSkips(skips);
376
+ enqueue(queue, items);
377
+ }
378
+ writeFileSync(QUEUE_FILE, JSON.stringify(queue));
379
+ mergeRest(queue); // background may have finished while we expanded
380
+ }
381
+ }
382
+ function loadQueue() {
383
+ if (!existsSync(QUEUE_FILE))
384
+ return null;
385
+ try {
386
+ const q = JSON.parse(readFileSync(QUEUE_FILE, 'utf8'));
387
+ // Resume only a same-day queue of the current shape. A day-old queue is stale →
388
+ // rebuild so newly published videos appear.
389
+ if (q.built_at !== today)
390
+ return null;
391
+ if (!Array.isArray(q.channels_todo) || !Array.isArray(q.items))
392
+ return null;
393
+ if (!Array.isArray(q.seen))
394
+ q.seen = []; // normalize older same-day caches
395
+ return q;
396
+ }
397
+ catch {
398
+ return null;
399
+ }
400
+ }
401
+ // ---------- advance (the lazy step) ----------
402
+ /** state.md pointer for this item's type, or null if the channel has no row yet. */
403
+ function statePointer(item) {
404
+ const row = parseState(readFileSync(STATE_MD, 'utf8')).find(r => r.handle === item.channel);
405
+ if (!row)
406
+ return null;
407
+ return item.type === 'longform' ? row.last_longform_id
408
+ : item.type === 'short' ? row.last_short_id : row.last_live_id;
409
+ }
410
+ /**
411
+ * Pure-ish pipeline for one queue item: fetch transcript → substance check + summary.
412
+ * No side effects on state.md / queue / pending — the caller decides what to do with the
413
+ * result. Shared by the foreground advance and the --prefetch child.
414
+ */
415
+ async function processItem(item) {
416
+ const tT = Date.now();
417
+ const t = await run([RUNTIME, script('yt-transcript'), item.videoId, '--lang', 'auto']);
418
+ log(`transcript ${item.videoId} ${Date.now() - tT}ms (exit ${t.code})`);
419
+ if (t.code === 2)
420
+ return { kind: 'rate_limited' };
421
+ if (t.code !== 0)
422
+ return { kind: 'skip', status: 'no_transcript' };
423
+ if (!t.stdout.trim())
424
+ return { kind: 'skip', status: 'content_skip' };
425
+ const tC = Date.now();
426
+ const summary = await runContentFilter(item, t.stdout);
427
+ log(`content ${item.videoId} ${Date.now() - tC}ms`);
428
+ if (summary.startsWith('OFFTOPIC:'))
429
+ return { kind: 'skip', status: 'content_skip' };
430
+ return { kind: 'ratable', summary };
431
+ }
432
+ async function advance(queue) {
433
+ while (true) {
434
+ // Pull in the background fill (or expand a channel ourselves) until there's an
435
+ // item to look at. Empty after this → nothing left anywhere → done.
436
+ await ensureItems(queue);
437
+ if (queue.items.length === 0)
438
+ break;
439
+ const item = queue.items[0];
440
+ // Head already resolved last round (rated/skipped → pointer landed on it) → drop.
441
+ if (statePointer(item) === item.videoId) {
442
+ dropHead(queue);
443
+ continue;
444
+ }
445
+ // Warm prefetch from the background child? Use it and skip the live fetch + content filter.
446
+ const cached = loadPrefetch(item.videoId);
447
+ if (cached)
448
+ log(`prefetch hit ${item.videoId}`);
449
+ const result = cached
450
+ ? { kind: 'ratable', summary: cached }
451
+ : await processItem(item);
452
+ if (result.kind === 'rate_limited') {
453
+ writeFileSync(QUEUE_FILE, JSON.stringify(queue));
454
+ emit({ status: 'rate_limited' });
455
+ }
456
+ if (result.kind === 'skip') {
457
+ persistSkip(item, result.status);
458
+ dropHead(queue);
459
+ continue;
460
+ }
461
+ // Ratable — hand off to the frontend. Leave item at queue head (rating bumps state,
462
+ // next call detects pointer === videoId and drops it).
463
+ const pending = {
464
+ channel: item.channel, videoId: item.videoId, title: item.title,
465
+ type: item.type, publishedAt: item.publishedAt, is_baseline: item.is_baseline,
466
+ };
467
+ writeFileSync(PENDING_FILE, JSON.stringify(pending));
468
+ writeFileSync(QUEUE_FILE, JSON.stringify(queue));
469
+ // Warm the NEXT video in the background while the user rates this one.
470
+ spawnPrefetch(queue.items[1]);
471
+ emit({ status: 'rating_needed', summary: result.summary, pending });
472
+ }
473
+ // All processed.
474
+ if (existsSync(QUEUE_FILE))
475
+ rmSync(QUEUE_FILE);
476
+ clearRest();
477
+ clearPrefetch();
478
+ emit({ status: 'done' });
479
+ }
480
+ /**
481
+ * --prefetch mode: compute the summary for one specific queued video and cache it, with
482
+ * NO side effects on state.md / queue / pending. Runs detached while the user rates the
483
+ * previous video. Best-effort — silent on any failure.
484
+ */
485
+ async function runPrefetch(videoId) {
486
+ const queue = loadQueue();
487
+ if (!queue)
488
+ process.exit(0);
489
+ const item = queue.items.find(i => i.videoId === videoId);
490
+ if (!item)
491
+ process.exit(0);
492
+ if (statePointer(item) === item.videoId)
493
+ process.exit(0); // already resolved
494
+ if (loadPrefetch(item.videoId))
495
+ process.exit(0); // already warm
496
+ const result = await processItem(item);
497
+ if (result.kind === 'ratable') {
498
+ writePrefetch({ videoId: item.videoId, summary: result.summary, built_at: today });
499
+ }
500
+ process.exit(0);
501
+ }
502
+ /**
503
+ * --fill mode: expand EVERY channel in parallel (bounded) and write the result to
504
+ * queue-rest.json for the foreground to merge. PURE — never writes state.md or
505
+ * queue.json. Best-effort: a single channel's failure must not sink the whole fill.
506
+ */
507
+ async function runFill() {
508
+ if (loadRest())
509
+ process.exit(0); // already filled this run
510
+ const refs = channelRefs();
511
+ const allItems = [];
512
+ const allSkips = [];
513
+ let idx = 0;
514
+ async function worker() {
515
+ while (idx < refs.length) {
516
+ const ref = refs[idx++];
517
+ try {
518
+ const { items, skips } = await expandChannel(ref);
519
+ allItems.push(...items);
520
+ allSkips.push(...skips);
521
+ }
522
+ catch { /* one channel down must not sink the fill */ }
523
+ }
524
+ }
525
+ await Promise.all(Array.from({ length: Math.min(CONCURRENCY, refs.length || 1) }, () => worker()));
526
+ writeRest({ built_at: today, items: allItems, skips: allSkips });
527
+ log(`fill done: ${allItems.length} items, ${allSkips.length} skips`);
528
+ process.exit(0);
529
+ }
530
+ // ---------- entry ----------
531
+ if (prefetchTarget) {
532
+ await runPrefetch(prefetchTarget); // detached background warmup — never returns
533
+ }
534
+ if (fillMode) {
535
+ await runFill(); // detached background queue builder — never returns
536
+ }
537
+ if (reset) {
538
+ if (existsSync(QUEUE_FILE))
539
+ rmSync(QUEUE_FILE);
540
+ if (existsSync(PENDING_FILE))
541
+ rmSync(PENDING_FILE);
542
+ clearRest();
543
+ clearPrefetch();
544
+ }
545
+ const queue = loadQueue() ?? buildQueue();
546
+ await advance(queue);