yt-briefing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/yt/SKILL.md +54 -0
- package/LICENSE +21 -0
- package/README.md +106 -0
- package/data.example/.gitattributes +7 -0
- package/data.example/README.md +19 -0
- package/data.example/channels/_template.md +45 -0
- package/dist/bootstrap.js +243 -0
- package/dist/cli.js +29 -0
- package/dist/install-skill.js +51 -0
- package/dist/lib/config.js +23 -0
- package/dist/lib/llm.js +57 -0
- package/dist/lib/paths.js +56 -0
- package/dist/lib/prompt.js +39 -0
- package/dist/lib/skill-install.js +66 -0
- package/dist/lib/yt-api.js +122 -0
- package/dist/lib/yt-lib.js +157 -0
- package/dist/yt-channel-pending.js +85 -0
- package/dist/yt-channel-videos.js +43 -0
- package/dist/yt-rating.js +110 -0
- package/dist/yt-sweep.js +546 -0
- package/dist/yt-transcript.js +156 -0
- package/docs/sync-across-machines.md +127 -0
- package/docs/warp-proxy.md +81 -0
- package/package.json +56 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Usage:
|
|
4
|
+
* bun src/yt-rating.ts --rating 0|1 [--comment "..."]
|
|
5
|
+
*
|
|
6
|
+
* Channel / id / title / type default to <DATA_DIR>/.cache/pending.json (written by
|
|
7
|
+
* yt-sweep.ts) so the agent only passes --rating (+ optional --comment) — no fragile
|
|
8
|
+
* shell quoting of emoji/quote-laden titles. Explicit flags override:
|
|
9
|
+
* --channel @X --id Y --title "..." --type longform|short|live [--baseline] [--cap 10] [--no-state]
|
|
10
|
+
*
|
|
11
|
+
* Rating model (no positive rating — keeping the channel is the implicit positive):
|
|
12
|
+
* 1 = neutral → bump the state pointer only (video seen, no signal), profile untouched.
|
|
13
|
+
* 0 = worthless → append a negative few-shot to `## Skip titles` (FIFO cap, default 10).
|
|
14
|
+
* comment → append a durable rule to `## Notes`, seen by both filters.
|
|
15
|
+
*
|
|
16
|
+
* Direct durable commit — no rolling buffer, no consolidation. Idempotent: identical
|
|
17
|
+
* bullets are de-duplicated; a state.md re-bump is a no-op.
|
|
18
|
+
*/
|
|
19
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
20
|
+
import dotenv from 'dotenv';
|
|
21
|
+
import { parseChannels, appendSkipTitle, appendNote, bumpStatePointer } from "./lib/yt-lib.js";
|
|
22
|
+
import { CHANNELS_MD, STATE_MD, PENDING_FILE, ENV_PATH, profilePath } from "./lib/paths.js";
|
|
23
|
+
dotenv.config({ path: ENV_PATH });
|
|
24
|
+
function getArg(args, name) {
|
|
25
|
+
const idx = args.indexOf(name);
|
|
26
|
+
return idx !== -1 && args[idx + 1] !== undefined ? args[idx + 1] : null;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Video metadata defaults to .cache/pending.json (written by yt-sweep.ts) so the agent
|
|
30
|
+
* only needs to pass --rating (+ optional --comment). Explicit flags still override.
|
|
31
|
+
*/
|
|
32
|
+
function loadPending() {
|
|
33
|
+
if (!existsSync(PENDING_FILE))
|
|
34
|
+
return {};
|
|
35
|
+
try {
|
|
36
|
+
return JSON.parse(readFileSync(PENDING_FILE, 'utf8'));
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return {};
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function parseArgs(argv) {
|
|
43
|
+
const pending = loadPending();
|
|
44
|
+
const channel = getArg(argv, '--channel') ?? pending.channel ?? null;
|
|
45
|
+
const id = getArg(argv, '--id') ?? pending.videoId ?? null;
|
|
46
|
+
const title = getArg(argv, '--title') ?? pending.title ?? null;
|
|
47
|
+
const type = getArg(argv, '--type') ?? pending.type ?? null;
|
|
48
|
+
const ratingRaw = getArg(argv, '--rating');
|
|
49
|
+
const comment = getArg(argv, '--comment') ?? '';
|
|
50
|
+
const baseline = argv.includes('--baseline') || pending.is_baseline === true;
|
|
51
|
+
const noState = argv.includes('--no-state');
|
|
52
|
+
const capRaw = getArg(argv, '--cap');
|
|
53
|
+
if (!channel || !id || !title || !type || !ratingRaw) {
|
|
54
|
+
console.error('Usage: yt-briefing rate --rating 0|1 [--comment "..."] (channel/id/title/type default to .cache/pending.json; override with --channel @X --id Y --title "..." --type longform|short|live) [--baseline] [--cap 10] [--no-state]');
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
if (!['longform', 'short', 'live'].includes(type)) {
|
|
58
|
+
console.error(`Invalid --type: ${type}`);
|
|
59
|
+
process.exit(1);
|
|
60
|
+
}
|
|
61
|
+
const rating = parseInt(ratingRaw, 10);
|
|
62
|
+
// Permissive 0..5 so older profiles / scripts keep working; the live UI emits only 0|1.
|
|
63
|
+
if (!Number.isFinite(rating) || rating < 0 || rating > 5) {
|
|
64
|
+
console.error(`Invalid --rating: ${ratingRaw} (must be 0 or 1)`);
|
|
65
|
+
process.exit(1);
|
|
66
|
+
}
|
|
67
|
+
const cap = capRaw ? parseInt(capRaw, 10) : 10;
|
|
68
|
+
return { channel, id, title, type: type, rating, comment, baseline, cap, noState };
|
|
69
|
+
}
|
|
70
|
+
const args = parseArgs(process.argv.slice(2));
|
|
71
|
+
const channels = parseChannels(readFileSync(CHANNELS_MD, 'utf8'));
|
|
72
|
+
const ch = channels.find(c => c.handle === args.channel);
|
|
73
|
+
if (!ch) {
|
|
74
|
+
console.error(`Channel ${args.channel} not found in channels.md`);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
const profile = profilePath(ch.slug);
|
|
78
|
+
if (!existsSync(profile)) {
|
|
79
|
+
console.error(`Profile not found: ${profile}`);
|
|
80
|
+
process.exit(1);
|
|
81
|
+
}
|
|
82
|
+
const date = new Date().toISOString().slice(0, 10);
|
|
83
|
+
// 1. Durable profile writes (no buffer, no consolidation):
|
|
84
|
+
// rating=0 → negative few-shot; comment → Notes rule. rating=1 w/o comment → nothing.
|
|
85
|
+
const profileBefore = readFileSync(profile, 'utf8');
|
|
86
|
+
let profileAfter = profileBefore;
|
|
87
|
+
if (args.rating === 0) {
|
|
88
|
+
profileAfter = appendSkipTitle(profileAfter, { title: args.title, type: args.type }, args.cap);
|
|
89
|
+
}
|
|
90
|
+
if (args.comment && args.comment.trim()) {
|
|
91
|
+
profileAfter = appendNote(profileAfter, args.comment.trim());
|
|
92
|
+
}
|
|
93
|
+
if (profileAfter !== profileBefore) {
|
|
94
|
+
writeFileSync(profile, profileAfter, 'utf8');
|
|
95
|
+
}
|
|
96
|
+
// 2. Bump state.md pointer (unless --no-state)
|
|
97
|
+
let stateBumped = false;
|
|
98
|
+
if (!args.noState) {
|
|
99
|
+
const stateBefore = readFileSync(STATE_MD, 'utf8');
|
|
100
|
+
const stateAfter = bumpStatePointer(stateBefore, args.channel, args.type, args.id, date);
|
|
101
|
+
if (stateAfter !== stateBefore) {
|
|
102
|
+
writeFileSync(STATE_MD, stateAfter, 'utf8');
|
|
103
|
+
stateBumped = true;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
console.log(JSON.stringify({
|
|
107
|
+
ok: true,
|
|
108
|
+
profile: `channels/${ch.slug}.md`,
|
|
109
|
+
state_bumped: stateBumped,
|
|
110
|
+
}));
|
package/dist/yt-sweep.js
ADDED
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* yt-sweep.ts — lazy briefing engine. ONE invocation advances to the next video that
|
|
4
|
+
* needs a rating (or reports done / rate_limited). All control flow, gate logic,
|
|
5
|
+
* transcript fetch, and LLM calls (title-filter classification + content-filter summary)
|
|
6
|
+
* live here. The frontend (skill or CLI) only renders the summary and collects the
|
|
7
|
+
* rating — zero loop logic outside this file, no subprocess LLM.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* bun src/yt-sweep.ts [--reset]
|
|
11
|
+
* bun src/yt-sweep.ts --prefetch <videoId> (internal: detached next-video warmup)
|
|
12
|
+
* bun src/yt-sweep.ts --fill (internal: detached queue builder)
|
|
13
|
+
*
|
|
14
|
+
* Output (stdout, single JSON line):
|
|
15
|
+
* {"status":"rating_needed","summary":"<md>","pending":{channel,videoId,title,type,publishedAt,is_baseline}}
|
|
16
|
+
* {"status":"done"}
|
|
17
|
+
* {"status":"rate_limited"}
|
|
18
|
+
*
|
|
19
|
+
* The engine ONLY writes files under DATA_DIR — it never runs git or any VCS. If you
|
|
20
|
+
* want your briefing state versioned, commit DATA_DIR yourself (or point YT_DATA_DIR at
|
|
21
|
+
* a synced folder). Keeping persistence out of the engine is deliberate: it stays a pure
|
|
22
|
+
* data tool with zero host coupling.
|
|
23
|
+
*
|
|
24
|
+
* State model (no module-global cache — each call is a fresh process):
|
|
25
|
+
* - <DATA_DIR>/state.md durable source of truth for what's been rated/skipped.
|
|
26
|
+
* ONLY the foreground process writes it.
|
|
27
|
+
* - <DATA_DIR>/.cache/queue.json lazy per-session queue: `channels_todo` (not yet
|
|
28
|
+
* expanded) + `items` (expanded, kept videos awaiting a
|
|
29
|
+
* rating) + `seen`. Foreground is the SOLE writer.
|
|
30
|
+
* Tagged with built_at; auto-rebuilt on a new day or via --reset.
|
|
31
|
+
* - <DATA_DIR>/.cache/queue-rest.json background-fill handoff written by the --fill child.
|
|
32
|
+
* - <DATA_DIR>/.cache/pending.json current ratable video's metadata for yt-rating.ts.
|
|
33
|
+
* - <DATA_DIR>/.cache/prefetch.json background-computed summary for the NEXT video.
|
|
34
|
+
*
|
|
35
|
+
* All .cache/ files are throwaway (rebuilt each run; safe to delete / gitignore).
|
|
36
|
+
*
|
|
37
|
+
* Lazy build + background fill (fast first paint): the first call lists the channels
|
|
38
|
+
* (cheap) and spawns a detached `--fill` child that expands EVERY channel in parallel
|
|
39
|
+
* into queue-rest.json. Meanwhile the foreground expands just enough channels to emit
|
|
40
|
+
* the FIRST ratable video. Concurrency is safe: the foreground solely owns queue.json +
|
|
41
|
+
* state.md; the --fill child only writes queue-rest.json and serializes its title-skips.
|
|
42
|
+
*
|
|
43
|
+
* Prefetch: after emitting `rating_needed`, the engine spawns a detached
|
|
44
|
+
* `--prefetch <nextVideoId>` child that fetches + summarizes the next video WHILE the
|
|
45
|
+
* user rates the current one, caching it in prefetch.json.
|
|
46
|
+
*
|
|
47
|
+
* Crash-safety: a crash before the rating leaves state.md unbumped and the queue head
|
|
48
|
+
* intact → the next call re-derives and reprocesses just that one video.
|
|
49
|
+
*/
|
|
50
|
+
import { readFileSync, writeFileSync, existsSync, rmSync, mkdirSync, renameSync, appendFileSync } from 'node:fs';
|
|
51
|
+
import { spawn } from 'node:child_process';
|
|
52
|
+
import dotenv from 'dotenv';
|
|
53
|
+
import { parseChannels, parseState, bumpStatePointer } from "./lib/yt-lib.js";
|
|
54
|
+
import { chat, getModel } from "./lib/llm.js";
|
|
55
|
+
import { outputLang } from "./lib/config.js";
|
|
56
|
+
import { PKG_ROOT, ENV_PATH, CHANNELS_MD, STATE_MD, CACHE_DIR, QUEUE_FILE, REST_FILE, PENDING_FILE, PREFETCH_FILE, LOG_FILE, profilePath, script, } from "./lib/paths.js";
|
|
57
|
+
dotenv.config({ path: ENV_PATH });
|
|
58
|
+
mkdirSync(CACHE_DIR, { recursive: true });
|
|
59
|
+
// Re-invoke sibling scripts with the SAME runtime that launched us (bun/node/deno),
|
|
60
|
+
// never a hardcoded binary — the tool must run wherever the user installed it.
|
|
61
|
+
const RUNTIME = process.execPath;
|
|
62
|
+
// Max channels expanded concurrently — both the foreground first-paint waves and the
|
|
63
|
+
// background --fill. Cold channel expansion is network-bound (a few fetch round trips),
|
|
64
|
+
// so a wave overlaps the latencies instead of paying them sequentially.
|
|
65
|
+
const CONCURRENCY = 6;
|
|
66
|
+
const argv = process.argv.slice(2);
|
|
67
|
+
const reset = argv.includes('--reset');
|
|
68
|
+
const fillMode = argv.includes('--fill');
|
|
69
|
+
const pfIdx = argv.indexOf('--prefetch');
|
|
70
|
+
const prefetchTarget = pfIdx !== -1 ? argv[pfIdx + 1] : null;
|
|
71
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
72
|
+
const LANG = outputLang();
|
|
73
|
+
// Diagnostics sink. Default: silent (stdout stays a pure JSON line — the caller never
|
|
74
|
+
// has to redirect anything, so no /tmp). With YT_DEBUG set, timing + child stderr append
|
|
75
|
+
// to <DATA_DIR>/.cache/sweep.log (gitignored) — never an OS temp dir.
|
|
76
|
+
const DEBUG = !!process.env.YT_DEBUG;
|
|
77
|
+
const T0 = Date.now();
|
|
78
|
+
const log = (msg) => { if (DEBUG)
|
|
79
|
+
appendFileSync(LOG_FILE, `⏱ ${msg} (+${Date.now() - T0}ms)\n`); };
|
|
80
|
+
// ---------- subprocess helper ----------
|
|
81
|
+
function run(cmd) {
|
|
82
|
+
return new Promise((resolve, reject) => {
|
|
83
|
+
const [exe, ...args] = cmd;
|
|
84
|
+
const p = spawn(exe, args, { cwd: PKG_ROOT, env: { ...process.env } });
|
|
85
|
+
let stdout = '';
|
|
86
|
+
p.stdout.on('data', d => { stdout += d.toString(); });
|
|
87
|
+
// Child stderr → the gitignored debug log only (never parent stderr / stdout), so a
|
|
88
|
+
// bare invocation emits nothing but the JSON line. Silent unless YT_DEBUG.
|
|
89
|
+
if (DEBUG)
|
|
90
|
+
p.stderr.on('data', d => appendFileSync(LOG_FILE, d.toString()));
|
|
91
|
+
else
|
|
92
|
+
p.stderr.resume(); // drain so the child never blocks on a full pipe
|
|
93
|
+
p.on('close', code => resolve({ stdout, code: code ?? 1 }));
|
|
94
|
+
p.on('error', reject);
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
// ---------- state mutation (inline, crash-safe per write) ----------
|
|
98
|
+
function bumpState(handle, type, videoId) {
|
|
99
|
+
const before = readFileSync(STATE_MD, 'utf8');
|
|
100
|
+
const after = bumpStatePointer(before, handle, type, videoId, today);
|
|
101
|
+
if (after !== before)
|
|
102
|
+
writeFileSync(STATE_MD, after, 'utf8');
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Persist a skip. no_transcript does NOT bump the pointer (transcript may appear
|
|
106
|
+
* later → retry on the next run); every other skip advances it.
|
|
107
|
+
*/
|
|
108
|
+
function persistSkip(item, status) {
|
|
109
|
+
if (status !== 'no_transcript')
|
|
110
|
+
bumpState(item.channel, item.type, item.videoId);
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Apply title-skips to state.md. Always bumps the pointer (a title-skip is final).
|
|
114
|
+
* Idempotent — re-applying the same skip is a no-op. ONLY the foreground calls this.
|
|
115
|
+
*/
|
|
116
|
+
function applyTitleSkips(skips) {
|
|
117
|
+
for (const s of skips)
|
|
118
|
+
bumpState(s.channel, s.type, s.videoId);
|
|
119
|
+
}
|
|
120
|
+
/** Append items, skipping any videoId already queued OR already resolved (seen). */
|
|
121
|
+
function enqueue(queue, add) {
|
|
122
|
+
const have = new Set([...queue.items.map(i => i.videoId), ...queue.seen]);
|
|
123
|
+
for (const it of add)
|
|
124
|
+
if (!have.has(it.videoId)) {
|
|
125
|
+
queue.items.push(it);
|
|
126
|
+
have.add(it.videoId);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/** Pop the head and record it as resolved so a later background merge won't re-add it. */
|
|
130
|
+
function dropHead(queue) {
|
|
131
|
+
const v = queue.items.shift();
|
|
132
|
+
if (v)
|
|
133
|
+
queue.seen.push(v.videoId);
|
|
134
|
+
}
|
|
135
|
+
function loadPrefetch(videoId) {
|
|
136
|
+
if (!existsSync(PREFETCH_FILE))
|
|
137
|
+
return null;
|
|
138
|
+
try {
|
|
139
|
+
const p = JSON.parse(readFileSync(PREFETCH_FILE, 'utf8'));
|
|
140
|
+
if (p.videoId === videoId && p.built_at === today)
|
|
141
|
+
return p.summary;
|
|
142
|
+
}
|
|
143
|
+
catch { /* corrupt → ignore */ }
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
/** Atomic write (temp + rename) so a concurrent reader never sees a partial file. */
|
|
147
|
+
function writePrefetch(p) {
|
|
148
|
+
const tmp = `${PREFETCH_FILE}.${process.pid}.tmp`;
|
|
149
|
+
writeFileSync(tmp, JSON.stringify(p));
|
|
150
|
+
renameSync(tmp, PREFETCH_FILE);
|
|
151
|
+
}
|
|
152
|
+
function clearPrefetch() {
|
|
153
|
+
if (existsSync(PREFETCH_FILE))
|
|
154
|
+
rmSync(PREFETCH_FILE);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Spawn a detached child that warms the prefetch cache for `next` while the user rates
|
|
158
|
+
* the current video. Best-effort: failures are silent, the foreground always falls back
|
|
159
|
+
* to a live fetch. The child outlives this process (detached + unref).
|
|
160
|
+
*/
|
|
161
|
+
function spawnPrefetch(next) {
|
|
162
|
+
if (!next)
|
|
163
|
+
return;
|
|
164
|
+
const child = spawn(RUNTIME, [script('yt-sweep'), '--prefetch', next.videoId], {
|
|
165
|
+
cwd: PKG_ROOT,
|
|
166
|
+
env: { ...process.env },
|
|
167
|
+
detached: true,
|
|
168
|
+
stdio: 'ignore',
|
|
169
|
+
});
|
|
170
|
+
child.unref();
|
|
171
|
+
}
|
|
172
|
+
// ---------- background queue fill (the rest of the channels, in parallel) ----------
|
|
173
|
+
function loadRest() {
|
|
174
|
+
if (!existsSync(REST_FILE))
|
|
175
|
+
return null;
|
|
176
|
+
try {
|
|
177
|
+
const r = JSON.parse(readFileSync(REST_FILE, 'utf8'));
|
|
178
|
+
return r.built_at === today ? r : null;
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/** Atomic write (temp + rename) so the foreground never reads a partial file. */
|
|
185
|
+
function writeRest(r) {
|
|
186
|
+
const tmp = `${REST_FILE}.${process.pid}.tmp`;
|
|
187
|
+
writeFileSync(tmp, JSON.stringify(r));
|
|
188
|
+
renameSync(tmp, REST_FILE);
|
|
189
|
+
}
|
|
190
|
+
function clearRest() {
|
|
191
|
+
if (existsSync(REST_FILE))
|
|
192
|
+
rmSync(REST_FILE);
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Spawn a detached `--fill` child that expands ALL channels in parallel while the
|
|
196
|
+
* foreground emits the first video. The child writes queue-rest.json only (never touches
|
|
197
|
+
* queue.json / state.md). Best-effort: if it dies, the foreground expands channels itself.
|
|
198
|
+
*/
|
|
199
|
+
function spawnBackgroundFill() {
|
|
200
|
+
const child = spawn(RUNTIME, [script('yt-sweep'), '--fill'], {
|
|
201
|
+
cwd: PKG_ROOT,
|
|
202
|
+
env: { ...process.env },
|
|
203
|
+
detached: true,
|
|
204
|
+
stdio: 'ignore',
|
|
205
|
+
});
|
|
206
|
+
child.unref();
|
|
207
|
+
}
|
|
208
|
+
function emit(obj) {
|
|
209
|
+
log(`EXIT status=${obj.status}`);
|
|
210
|
+
process.stdout.write(JSON.stringify(obj));
|
|
211
|
+
process.exit(0);
|
|
212
|
+
}
|
|
213
|
+
// ---------- LLM gates ----------
|
|
214
|
+
/** Title filter: batch-classify a channel's non-baseline titles. Falls back to keep-all on any error. */
|
|
215
|
+
async function runTitleFilter(profilePathAbs, videos) {
|
|
216
|
+
const skip = new Set();
|
|
217
|
+
if (!existsSync(profilePathAbs))
|
|
218
|
+
return skip;
|
|
219
|
+
const profile = readFileSync(profilePathAbs, 'utf8');
|
|
220
|
+
if (!/##\s*Skip titles/.test(profile))
|
|
221
|
+
return skip;
|
|
222
|
+
const toClassify = videos.filter(v => !v.is_baseline);
|
|
223
|
+
if (toClassify.length === 0)
|
|
224
|
+
return skip;
|
|
225
|
+
const prompt = `Title-filter batch classification for a YouTube briefing tool.
|
|
226
|
+
|
|
227
|
+
Channel profile:
|
|
228
|
+
${profile}
|
|
229
|
+
|
|
230
|
+
Focus on the '## Skip titles' section (titles to skip) and any '## Notes' rules.
|
|
231
|
+
Keep by default — only skip a title that clearly matches the worthless pattern. If that section is missing or empty: classify all as keep.
|
|
232
|
+
|
|
233
|
+
Videos:
|
|
234
|
+
${JSON.stringify(toClassify.map(v => ({ id: v.videoId, title: v.title, type: v.type })))}
|
|
235
|
+
|
|
236
|
+
Output ONLY a raw JSON array (no markdown fences, no explanation):
|
|
237
|
+
[{"id":"VIDEO_ID","result":"keep"},{"id":"VIDEO_ID","result":"skip","reason":"max 12 words"},...]`;
|
|
238
|
+
let out;
|
|
239
|
+
try {
|
|
240
|
+
out = await chat(prompt, {
|
|
241
|
+
system: "You are a video title classifier. Output ONLY a raw JSON array as instructed. No markdown fences, no explanation.",
|
|
242
|
+
model: getModel(),
|
|
243
|
+
temperature: 0,
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
return skip;
|
|
248
|
+
}
|
|
249
|
+
try {
|
|
250
|
+
const start = out.indexOf('['), end = out.lastIndexOf(']');
|
|
251
|
+
if (start === -1 || end === -1)
|
|
252
|
+
return skip;
|
|
253
|
+
const parsed = JSON.parse(out.slice(start, end + 1));
|
|
254
|
+
for (const r of parsed)
|
|
255
|
+
if (r.result === 'skip')
|
|
256
|
+
skip.add(r.id);
|
|
257
|
+
}
|
|
258
|
+
catch { /* keep-all */ }
|
|
259
|
+
return skip;
|
|
260
|
+
}
|
|
261
|
+
/** Content filter: substance check + summary in the configured language. Returns markdown, or 'OFFTOPIC: <reason>'. */
|
|
262
|
+
async function runContentFilter(item, transcript) {
|
|
263
|
+
const profile = existsSync(item.profile_path) ? readFileSync(item.profile_path, 'utf8') : '';
|
|
264
|
+
const baselineNote = item.is_baseline ? ' · baseline' : '';
|
|
265
|
+
const profileSection = profile
|
|
266
|
+
? `\nChannel profile (sections to honor: Channel policy, Summary format, Cut sections, Episode types, Notes):\n${profile}\n`
|
|
267
|
+
: '';
|
|
268
|
+
const prompt = `Write a summary of this YouTube video in ${LANG}, OR return 'OFFTOPIC: <reason>' if the transcript clearly does not match what the title/channel promises.
|
|
269
|
+
|
|
270
|
+
Video:
|
|
271
|
+
- videoId: ${item.videoId}
|
|
272
|
+
- channel: ${item.channel}
|
|
273
|
+
- title: ${item.title}
|
|
274
|
+
- type: ${item.type}
|
|
275
|
+
- publishedAt: ${item.publishedAt}
|
|
276
|
+
- url: https://youtube.com/watch?v=${item.videoId}
|
|
277
|
+
|
|
278
|
+
Transcript:
|
|
279
|
+
${transcript}
|
|
280
|
+
${profileSection}
|
|
281
|
+
Steps:
|
|
282
|
+
1. Substance check: does the transcript actually deliver what the title promises? If clearly not → output ONLY 'OFFTOPIC: <short reason>' and stop.
|
|
283
|
+
2. Otherwise write the summary:
|
|
284
|
+
- Header: ### ${item.channel} — "${item.title}"
|
|
285
|
+
- Subtitle: _${item.publishedAt} · ${item.type} · https://youtube.com/watch?v=${item.videoId}${baselineNote}_
|
|
286
|
+
- 2-5 numbered thematic sections × 2-5 sentences each
|
|
287
|
+
- At most 5-8 short quotes from the transcript
|
|
288
|
+
- No timestamps
|
|
289
|
+
3. Language: natural ${LANG}. Avoid calques/anglicisms; use foreign words only for proper nouns or established technical terms. Section headers should be verb phrases, not noun stacks.
|
|
290
|
+
4. Output: ONLY the summary OR 'OFFTOPIC: ...'. No preamble, no trailing commentary.`;
|
|
291
|
+
return await chat(prompt, {
|
|
292
|
+
system: `You are a video summarizer writing in ${LANG}. Follow the task instructions exactly. Output only the summary or 'OFFTOPIC: <reason>'. No preamble, no commentary.`,
|
|
293
|
+
model: getModel(),
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
// ---------- queue build (lazy: list channels now, expand on demand) ----------
|
|
297
|
+
/** Map channels.md → channel refs (handle + absolute profile path). */
|
|
298
|
+
function channelRefs() {
|
|
299
|
+
return parseChannels(readFileSync(CHANNELS_MD, 'utf8'))
|
|
300
|
+
.map(c => ({ handle: c.handle, profile_path: profilePath(c.slug) }));
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Expand one channel: fetch its pending videos + run the title filter. PURE w.r.t.
|
|
304
|
+
* state.md / queue.json — returns kept items and the title-skips for the caller to
|
|
305
|
+
* persist. Shared by the foreground fallback and the background --fill child.
|
|
306
|
+
*/
|
|
307
|
+
async function expandChannel(ref) {
|
|
308
|
+
const t = Date.now();
|
|
309
|
+
const { stdout, code } = await run([RUNTIME, script('yt-channel-pending'), ref.handle]);
|
|
310
|
+
log(` pending ${ref.handle} ${Date.now() - t}ms`);
|
|
311
|
+
const videos = code === 0 ? JSON.parse(stdout || '[]') : [];
|
|
312
|
+
if (videos.length === 0)
|
|
313
|
+
return { items: [], skips: [] };
|
|
314
|
+
const candidates = videos.map(v => ({
|
|
315
|
+
channel: ref.handle, profile_path: ref.profile_path,
|
|
316
|
+
videoId: v.videoId, title: v.title, type: v.type,
|
|
317
|
+
publishedAt: v.publishedAt, is_baseline: v.is_baseline,
|
|
318
|
+
}));
|
|
319
|
+
const titleSkip = await runTitleFilter(ref.profile_path, candidates);
|
|
320
|
+
const items = [];
|
|
321
|
+
const skips = [];
|
|
322
|
+
for (const it of candidates) {
|
|
323
|
+
if (titleSkip.has(it.videoId))
|
|
324
|
+
skips.push({ channel: it.channel, type: it.type, videoId: it.videoId });
|
|
325
|
+
else
|
|
326
|
+
items.push(it);
|
|
327
|
+
}
|
|
328
|
+
return { items, skips };
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* Init the lazy queue (cheap — no API): list every channel as todo, no items yet,
|
|
332
|
+
* then spawn the background fill so the rest is computed while the foreground emits
|
|
333
|
+
* the first video.
|
|
334
|
+
*/
|
|
335
|
+
function buildQueue() {
|
|
336
|
+
clearPrefetch(); // fresh run → drop any prefetch left from a previous queue
|
|
337
|
+
clearRest();
|
|
338
|
+
const channels_todo = channelRefs();
|
|
339
|
+
const queue = { built_at: today, channels_todo, items: [], seen: [] };
|
|
340
|
+
writeFileSync(QUEUE_FILE, JSON.stringify(queue));
|
|
341
|
+
log(`queue init: ${channels_todo.length} channels to expand`);
|
|
342
|
+
spawnBackgroundFill();
|
|
343
|
+
return queue;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Merge a ready background fill into the queue: apply its title-skips to state.md,
|
|
347
|
+
* dedup-append its items, and clear channels_todo. No-op if no fresh fill is on disk.
|
|
348
|
+
*/
|
|
349
|
+
function mergeRest(queue) {
|
|
350
|
+
const rest = loadRest();
|
|
351
|
+
if (!rest)
|
|
352
|
+
return false;
|
|
353
|
+
applyTitleSkips(rest.skips);
|
|
354
|
+
enqueue(queue, rest.items);
|
|
355
|
+
queue.channels_todo = [];
|
|
356
|
+
clearRest();
|
|
357
|
+
log(`merged background fill: +${rest.items.length} items`);
|
|
358
|
+
return true;
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Guarantee the queue has at least one item to look at (or no channels left). First
|
|
362
|
+
* tries to consume the background fill; if it isn't ready, expands channels in the
|
|
363
|
+
* foreground itself so we never block on the background.
|
|
364
|
+
*/
|
|
365
|
+
async function ensureItems(queue) {
|
|
366
|
+
mergeRest(queue);
|
|
367
|
+
while (queue.items.length === 0 && queue.channels_todo.length > 0) {
|
|
368
|
+
// Expand a WAVE of channels in parallel rather than one at a time: each cold
|
|
369
|
+
// expansion is a few network round trips, so a wave costs ~one expansion of wall
|
|
370
|
+
// time instead of N sequential ones — that was 70% of cold first-paint. Foreground
|
|
371
|
+
// stays the sole writer: gather the wave, then apply skips + enqueue here.
|
|
372
|
+
const wave = queue.channels_todo.splice(0, CONCURRENCY);
|
|
373
|
+
const results = await Promise.all(wave.map(ref => expandChannel(ref).catch(() => ({ items: [], skips: [] }))));
|
|
374
|
+
for (const { items, skips } of results) {
|
|
375
|
+
applyTitleSkips(skips);
|
|
376
|
+
enqueue(queue, items);
|
|
377
|
+
}
|
|
378
|
+
writeFileSync(QUEUE_FILE, JSON.stringify(queue));
|
|
379
|
+
mergeRest(queue); // background may have finished while we expanded
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
function loadQueue() {
|
|
383
|
+
if (!existsSync(QUEUE_FILE))
|
|
384
|
+
return null;
|
|
385
|
+
try {
|
|
386
|
+
const q = JSON.parse(readFileSync(QUEUE_FILE, 'utf8'));
|
|
387
|
+
// Resume only a same-day queue of the current shape. A day-old queue is stale →
|
|
388
|
+
// rebuild so newly published videos appear.
|
|
389
|
+
if (q.built_at !== today)
|
|
390
|
+
return null;
|
|
391
|
+
if (!Array.isArray(q.channels_todo) || !Array.isArray(q.items))
|
|
392
|
+
return null;
|
|
393
|
+
if (!Array.isArray(q.seen))
|
|
394
|
+
q.seen = []; // normalize older same-day caches
|
|
395
|
+
return q;
|
|
396
|
+
}
|
|
397
|
+
catch {
|
|
398
|
+
return null;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
// ---------- advance (the lazy step) ----------
|
|
402
|
+
/** state.md pointer for this item's type, or null if the channel has no row yet. */
|
|
403
|
+
function statePointer(item) {
|
|
404
|
+
const row = parseState(readFileSync(STATE_MD, 'utf8')).find(r => r.handle === item.channel);
|
|
405
|
+
if (!row)
|
|
406
|
+
return null;
|
|
407
|
+
return item.type === 'longform' ? row.last_longform_id
|
|
408
|
+
: item.type === 'short' ? row.last_short_id : row.last_live_id;
|
|
409
|
+
}
|
|
410
|
+
/**
|
|
411
|
+
* Pure-ish pipeline for one queue item: fetch transcript → substance check + summary.
|
|
412
|
+
* No side effects on state.md / queue / pending — the caller decides what to do with the
|
|
413
|
+
* result. Shared by the foreground advance and the --prefetch child.
|
|
414
|
+
*/
|
|
415
|
+
async function processItem(item) {
|
|
416
|
+
const tT = Date.now();
|
|
417
|
+
const t = await run([RUNTIME, script('yt-transcript'), item.videoId, '--lang', 'auto']);
|
|
418
|
+
log(`transcript ${item.videoId} ${Date.now() - tT}ms (exit ${t.code})`);
|
|
419
|
+
if (t.code === 2)
|
|
420
|
+
return { kind: 'rate_limited' };
|
|
421
|
+
if (t.code !== 0)
|
|
422
|
+
return { kind: 'skip', status: 'no_transcript' };
|
|
423
|
+
if (!t.stdout.trim())
|
|
424
|
+
return { kind: 'skip', status: 'content_skip' };
|
|
425
|
+
const tC = Date.now();
|
|
426
|
+
const summary = await runContentFilter(item, t.stdout);
|
|
427
|
+
log(`content ${item.videoId} ${Date.now() - tC}ms`);
|
|
428
|
+
if (summary.startsWith('OFFTOPIC:'))
|
|
429
|
+
return { kind: 'skip', status: 'content_skip' };
|
|
430
|
+
return { kind: 'ratable', summary };
|
|
431
|
+
}
|
|
432
|
+
async function advance(queue) {
|
|
433
|
+
while (true) {
|
|
434
|
+
// Pull in the background fill (or expand a channel ourselves) until there's an
|
|
435
|
+
// item to look at. Empty after this → nothing left anywhere → done.
|
|
436
|
+
await ensureItems(queue);
|
|
437
|
+
if (queue.items.length === 0)
|
|
438
|
+
break;
|
|
439
|
+
const item = queue.items[0];
|
|
440
|
+
// Head already resolved last round (rated/skipped → pointer landed on it) → drop.
|
|
441
|
+
if (statePointer(item) === item.videoId) {
|
|
442
|
+
dropHead(queue);
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
// Warm prefetch from the background child? Use it and skip the live fetch + content filter.
|
|
446
|
+
const cached = loadPrefetch(item.videoId);
|
|
447
|
+
if (cached)
|
|
448
|
+
log(`prefetch hit ${item.videoId}`);
|
|
449
|
+
const result = cached
|
|
450
|
+
? { kind: 'ratable', summary: cached }
|
|
451
|
+
: await processItem(item);
|
|
452
|
+
if (result.kind === 'rate_limited') {
|
|
453
|
+
writeFileSync(QUEUE_FILE, JSON.stringify(queue));
|
|
454
|
+
emit({ status: 'rate_limited' });
|
|
455
|
+
}
|
|
456
|
+
if (result.kind === 'skip') {
|
|
457
|
+
persistSkip(item, result.status);
|
|
458
|
+
dropHead(queue);
|
|
459
|
+
continue;
|
|
460
|
+
}
|
|
461
|
+
// Ratable — hand off to the frontend. Leave item at queue head (rating bumps state,
|
|
462
|
+
// next call detects pointer === videoId and drops it).
|
|
463
|
+
const pending = {
|
|
464
|
+
channel: item.channel, videoId: item.videoId, title: item.title,
|
|
465
|
+
type: item.type, publishedAt: item.publishedAt, is_baseline: item.is_baseline,
|
|
466
|
+
};
|
|
467
|
+
writeFileSync(PENDING_FILE, JSON.stringify(pending));
|
|
468
|
+
writeFileSync(QUEUE_FILE, JSON.stringify(queue));
|
|
469
|
+
// Warm the NEXT video in the background while the user rates this one.
|
|
470
|
+
spawnPrefetch(queue.items[1]);
|
|
471
|
+
emit({ status: 'rating_needed', summary: result.summary, pending });
|
|
472
|
+
}
|
|
473
|
+
// All processed.
|
|
474
|
+
if (existsSync(QUEUE_FILE))
|
|
475
|
+
rmSync(QUEUE_FILE);
|
|
476
|
+
clearRest();
|
|
477
|
+
clearPrefetch();
|
|
478
|
+
emit({ status: 'done' });
|
|
479
|
+
}
|
|
480
|
+
/**
|
|
481
|
+
* --prefetch mode: compute the summary for one specific queued video and cache it, with
|
|
482
|
+
* NO side effects on state.md / queue / pending. Runs detached while the user rates the
|
|
483
|
+
* previous video. Best-effort — silent on any failure.
|
|
484
|
+
*/
|
|
485
|
+
async function runPrefetch(videoId) {
|
|
486
|
+
const queue = loadQueue();
|
|
487
|
+
if (!queue)
|
|
488
|
+
process.exit(0);
|
|
489
|
+
const item = queue.items.find(i => i.videoId === videoId);
|
|
490
|
+
if (!item)
|
|
491
|
+
process.exit(0);
|
|
492
|
+
if (statePointer(item) === item.videoId)
|
|
493
|
+
process.exit(0); // already resolved
|
|
494
|
+
if (loadPrefetch(item.videoId))
|
|
495
|
+
process.exit(0); // already warm
|
|
496
|
+
const result = await processItem(item);
|
|
497
|
+
if (result.kind === 'ratable') {
|
|
498
|
+
writePrefetch({ videoId: item.videoId, summary: result.summary, built_at: today });
|
|
499
|
+
}
|
|
500
|
+
process.exit(0);
|
|
501
|
+
}
|
|
502
|
+
/**
|
|
503
|
+
* --fill mode: expand EVERY channel in parallel (bounded) and write the result to
|
|
504
|
+
* queue-rest.json for the foreground to merge. PURE — never writes state.md or
|
|
505
|
+
* queue.json. Best-effort: a single channel's failure must not sink the whole fill.
|
|
506
|
+
*/
|
|
507
|
+
async function runFill() {
|
|
508
|
+
if (loadRest())
|
|
509
|
+
process.exit(0); // already filled this run
|
|
510
|
+
const refs = channelRefs();
|
|
511
|
+
const allItems = [];
|
|
512
|
+
const allSkips = [];
|
|
513
|
+
let idx = 0;
|
|
514
|
+
async function worker() {
|
|
515
|
+
while (idx < refs.length) {
|
|
516
|
+
const ref = refs[idx++];
|
|
517
|
+
try {
|
|
518
|
+
const { items, skips } = await expandChannel(ref);
|
|
519
|
+
allItems.push(...items);
|
|
520
|
+
allSkips.push(...skips);
|
|
521
|
+
}
|
|
522
|
+
catch { /* one channel down must not sink the fill */ }
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, refs.length || 1) }, () => worker()));
|
|
526
|
+
writeRest({ built_at: today, items: allItems, skips: allSkips });
|
|
527
|
+
log(`fill done: ${allItems.length} items, ${allSkips.length} skips`);
|
|
528
|
+
process.exit(0);
|
|
529
|
+
}
|
|
530
|
+
// ---------- entry ----------
|
|
531
|
+
if (prefetchTarget) {
|
|
532
|
+
await runPrefetch(prefetchTarget); // detached background warmup — never returns
|
|
533
|
+
}
|
|
534
|
+
if (fillMode) {
|
|
535
|
+
await runFill(); // detached background queue builder — never returns
|
|
536
|
+
}
|
|
537
|
+
if (reset) {
|
|
538
|
+
if (existsSync(QUEUE_FILE))
|
|
539
|
+
rmSync(QUEUE_FILE);
|
|
540
|
+
if (existsSync(PENDING_FILE))
|
|
541
|
+
rmSync(PENDING_FILE);
|
|
542
|
+
clearRest();
|
|
543
|
+
clearPrefetch();
|
|
544
|
+
}
|
|
545
|
+
const queue = loadQueue() ?? buildQueue();
|
|
546
|
+
await advance(queue);
|