yt-liked 0.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +203 -0
- package/bin/ytl.mjs +2 -0
- package/dist/channel-enrich.js +209 -0
- package/dist/chrome-cookies.js +130 -0
- package/dist/classify-setup.js +409 -0
- package/dist/cli.js +625 -0
- package/dist/config.js +41 -0
- package/dist/db.js +28 -0
- package/dist/gemini-classify.js +424 -0
- package/dist/jsonl.js +22 -0
- package/dist/paths.js +26 -0
- package/dist/report.js +24 -0
- package/dist/types.js +1 -0
- package/dist/videos-db.js +534 -0
- package/dist/videos-import.js +122 -0
- package/dist/videos-viz.js +140 -0
- package/dist/youtube-web.js +217 -0
- package/package.json +47 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import { backfillStatePath, dataDir, defaultChromeUserDataDir, ensureDataDir, videosDbPath, videosJsonlPath, videosMetaPath, } from './paths.js';
|
|
4
|
+
import { extractChromeYoutubeCookies } from './chrome-cookies.js';
|
|
5
|
+
import { classifyCategories, classifyDomains, GEMINI_DEFAULT_MODEL } from './gemini-classify.js';
|
|
6
|
+
import { resolveClassifySetup } from './classify-setup.js';
|
|
7
|
+
import { geminiEnvLocalPath, loadEnv } from './config.js';
|
|
8
|
+
import { importVideoArchive } from './videos-import.js';
|
|
9
|
+
import { getVideoByLookupKey, getVideoStatusView, getVideoVizView, listVideos, requireVideoData, searchVideos } from './videos-db.js';
|
|
10
|
+
import { probeLikesHistory } from './youtube-web.js';
|
|
11
|
+
import { readProbeReport, saveProbeReport } from './report.js';
|
|
12
|
+
import { renderVideoViz } from './videos-viz.js';
|
|
13
|
+
import { enrichChannels } from './channel-enrich.js';
|
|
14
|
+
const R = '\x1b[38;5;196m';
|
|
15
|
+
const R2 = '\x1b[38;5;203m';
|
|
16
|
+
const W = '\x1b[97m';
|
|
17
|
+
const D = '\x1b[2m';
|
|
18
|
+
const X = '\x1b[0m';
|
|
19
|
+
const LOGO = `
|
|
20
|
+
${R}┌──────────────────────────────┐${X}
|
|
21
|
+
${R}│${X} ${W}Y T${X} ${R2}L i k e d${X} ${R}│${X}
|
|
22
|
+
${R}│${X} ${D}youtube-first archive CLI${X} ${R}│${X}
|
|
23
|
+
${R}└──────────────────────────────┘${X}`;
|
|
24
|
+
function stringifyDate(value) {
|
|
25
|
+
return value?.slice(0, 10) ?? '?';
|
|
26
|
+
}
|
|
27
|
+
function showIntro() {
|
|
28
|
+
console.log(`
|
|
29
|
+
Import, search, classify, and inspect a YouTube likes archive with
|
|
30
|
+
Gemini, Claude, or Codex, while keeping the browser-session probe
|
|
31
|
+
around for future native sync work.
|
|
32
|
+
Your data stays on your machine.
|
|
33
|
+
`);
|
|
34
|
+
}
|
|
35
|
+
function showClassifyPlan(plan) {
|
|
36
|
+
console.log(`
|
|
37
|
+
Launch plan:
|
|
38
|
+
Engine: ${plan.engine}
|
|
39
|
+
Profile: ${plan.profileLabel}
|
|
40
|
+
Model: ${plan.model ?? 'managed by local CLI'}
|
|
41
|
+
Batch size: ${plan.batchSize}
|
|
42
|
+
Workers: ${plan.concurrency}
|
|
43
|
+
|
|
44
|
+
Resume-safe:
|
|
45
|
+
Re-run the same command any time to continue filling missing labels.
|
|
46
|
+
`);
|
|
47
|
+
}
|
|
48
|
+
function safe(fn) {
|
|
49
|
+
return async (...args) => {
|
|
50
|
+
try {
|
|
51
|
+
await fn(...args);
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
console.error(`\n Error: ${error.message}\n`);
|
|
55
|
+
process.exitCode = 1;
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
function showPaths() {
|
|
60
|
+
ensureDataDir();
|
|
61
|
+
console.log(`
|
|
62
|
+
Data directory:
|
|
63
|
+
${dataDir()}
|
|
64
|
+
|
|
65
|
+
Local files:
|
|
66
|
+
${videosJsonlPath()}
|
|
67
|
+
${videosDbPath()}
|
|
68
|
+
${videosMetaPath()}
|
|
69
|
+
${backfillStatePath()}
|
|
70
|
+
${geminiEnvLocalPath()}
|
|
71
|
+
`);
|
|
72
|
+
}
|
|
73
|
+
function renderRecordSummary(record) {
|
|
74
|
+
const tags = [record.primary_category, record.primary_domain].filter(Boolean).join(' · ');
|
|
75
|
+
const summary = record.description && record.description.length > 140
|
|
76
|
+
? `${record.description.slice(0, 137)}...`
|
|
77
|
+
: record.description;
|
|
78
|
+
console.log(`${record.id} ${record.channel_title ?? 'Unknown channel'} ${stringifyDate(record.liked_at)}${tags ? ` ${tags}` : ''}`);
|
|
79
|
+
console.log(` ${record.title}`);
|
|
80
|
+
if (summary)
|
|
81
|
+
console.log(` ${summary}`);
|
|
82
|
+
console.log(` ${record.url}`);
|
|
83
|
+
console.log();
|
|
84
|
+
}
|
|
85
|
+
function formatPct(count, total) {
|
|
86
|
+
if (total <= 0)
|
|
87
|
+
return '0.0%';
|
|
88
|
+
return `${((count / total) * 100).toFixed(1)}%`;
|
|
89
|
+
}
|
|
90
|
+
function renderCountRows(rows, total, indent = ' ') {
|
|
91
|
+
if (rows.length === 0) {
|
|
92
|
+
console.log(`${indent}none yet`);
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
const labelWidth = Math.max(...rows.map((row) => row.label.length), 4);
|
|
96
|
+
const countWidth = Math.max(...rows.map((row) => row.count.toLocaleString().length), 1);
|
|
97
|
+
for (const row of rows) {
|
|
98
|
+
console.log(`${indent}${row.label.padEnd(labelWidth)} ${row.count.toLocaleString().padStart(countWidth)} (${formatPct(row.count, total)})`);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
async function requireImportedData() {
|
|
102
|
+
const hasData = await requireVideoData();
|
|
103
|
+
if (!hasData) {
|
|
104
|
+
console.log(`
|
|
105
|
+
No local video archive imported yet.
|
|
106
|
+
|
|
107
|
+
Run:
|
|
108
|
+
ytl import /path/to/liked_videos.json
|
|
109
|
+
`);
|
|
110
|
+
process.exitCode = 1;
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
return true;
|
|
114
|
+
}
|
|
115
|
+
async function showStatus() {
|
|
116
|
+
const view = await getVideoStatusView();
|
|
117
|
+
const report = readProbeReport();
|
|
118
|
+
const geminiConfigured = Boolean(process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY);
|
|
119
|
+
console.log(LOGO);
|
|
120
|
+
showIntro();
|
|
121
|
+
console.log(`
|
|
122
|
+
Imported videos: ${view.importedCount.toLocaleString()}
|
|
123
|
+
Categorized: ${view.categorizedCount.toLocaleString()}
|
|
124
|
+
Domain-tagged: ${view.domainCount.toLocaleString()}
|
|
125
|
+
Last classification engine: ${view.lastClassificationEngine ?? 'never'}
|
|
126
|
+
Last classification model: ${view.lastClassificationModel ?? 'n/a'}
|
|
127
|
+
Gemini key: ${geminiConfigured ? 'configured' : 'not configured'}
|
|
128
|
+
`);
|
|
129
|
+
if (report) {
|
|
130
|
+
console.log(`
|
|
131
|
+
Last probe: ${report.generatedAt}
|
|
132
|
+
Probe discovered via web continuation: ${report.discoveredCount.toLocaleString()}
|
|
133
|
+
Existing comparison ceiling: ${report.baselineCeiling.toLocaleString()}
|
|
134
|
+
Probe stop reason: ${report.stopReason}
|
|
135
|
+
Probe passed feasibility gate: ${report.browserMethodBeatCeiling ? 'yes' : 'no'}
|
|
136
|
+
`);
|
|
137
|
+
if (report.statedVideoCount != null) {
|
|
138
|
+
console.log(` YouTube page header count: ${report.statedVideoCount.toLocaleString()}`);
|
|
139
|
+
}
|
|
140
|
+
if (report.alertMessages?.length) {
|
|
141
|
+
console.log(` Alerts: ${report.alertMessages.join(' | ')}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
console.log(`
|
|
145
|
+
Saved files:
|
|
146
|
+
${videosJsonlPath()}
|
|
147
|
+
${videosDbPath()}
|
|
148
|
+
${videosMetaPath()}
|
|
149
|
+
${backfillStatePath()}
|
|
150
|
+
${geminiEnvLocalPath()}
|
|
151
|
+
`);
|
|
152
|
+
}
|
|
153
|
+
async function showViz() {
|
|
154
|
+
if (!await requireImportedData())
|
|
155
|
+
return;
|
|
156
|
+
const view = await getVideoVizView();
|
|
157
|
+
console.log(renderVideoViz(view));
|
|
158
|
+
}
|
|
159
|
+
async function runSearchCommand(query, options) {
|
|
160
|
+
if (!await requireImportedData())
|
|
161
|
+
return;
|
|
162
|
+
const results = await searchVideos({
|
|
163
|
+
query,
|
|
164
|
+
channel: options.channel,
|
|
165
|
+
category: options.category,
|
|
166
|
+
domain: options.domain,
|
|
167
|
+
limit: options.limit,
|
|
168
|
+
});
|
|
169
|
+
if (options.json) {
|
|
170
|
+
console.log(JSON.stringify(results, null, 2));
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
if (results.length === 0) {
|
|
174
|
+
console.log('\n No matching videos found.\n');
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
for (const result of results) {
|
|
178
|
+
renderRecordSummary({
|
|
179
|
+
id: result.id,
|
|
180
|
+
title: result.title,
|
|
181
|
+
channel_title: result.channelTitle,
|
|
182
|
+
liked_at: result.likedAt,
|
|
183
|
+
primary_category: result.primaryCategory,
|
|
184
|
+
primary_domain: result.primaryDomain,
|
|
185
|
+
url: result.url,
|
|
186
|
+
description: result.description,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
async function runListCommand(options) {
|
|
191
|
+
if (!await requireImportedData())
|
|
192
|
+
return;
|
|
193
|
+
const items = await listVideos({
|
|
194
|
+
query: options.query,
|
|
195
|
+
channel: options.channel,
|
|
196
|
+
category: options.category,
|
|
197
|
+
domain: options.domain,
|
|
198
|
+
privacy: options.privacy,
|
|
199
|
+
after: options.after,
|
|
200
|
+
before: options.before,
|
|
201
|
+
limit: options.limit,
|
|
202
|
+
offset: options.offset,
|
|
203
|
+
});
|
|
204
|
+
if (options.json) {
|
|
205
|
+
console.log(JSON.stringify(items, null, 2));
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
if (items.length === 0) {
|
|
209
|
+
console.log('\n No videos matched those filters.\n');
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
for (const item of items) {
|
|
213
|
+
renderRecordSummary(item);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
async function runShowCommand(id, options) {
|
|
217
|
+
if (!await requireImportedData())
|
|
218
|
+
return;
|
|
219
|
+
const item = await getVideoByLookupKey(id);
|
|
220
|
+
if (!item) {
|
|
221
|
+
console.log(`\n Video not found: ${id}\n`);
|
|
222
|
+
console.log(' Tip: ytl show accepts the stored id, a YouTube video id, or the full URL.\n');
|
|
223
|
+
process.exitCode = 1;
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
if (options.json) {
|
|
227
|
+
console.log(JSON.stringify(item, null, 2));
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
console.log(`${item.id} · ${item.channel_title ?? 'Unknown channel'}`);
|
|
231
|
+
console.log(item.url);
|
|
232
|
+
console.log(`title: ${item.title}`);
|
|
233
|
+
if (item.description)
|
|
234
|
+
console.log(`description: ${item.description}`);
|
|
235
|
+
console.log(`liked_at: ${item.liked_at ?? 'unknown'}`);
|
|
236
|
+
if (item.video_published_at)
|
|
237
|
+
console.log(`video_published_at: ${item.video_published_at}`);
|
|
238
|
+
if (item.duration)
|
|
239
|
+
console.log(`duration: ${item.duration}`);
|
|
240
|
+
if (item.privacy_status)
|
|
241
|
+
console.log(`privacy: ${item.privacy_status}`);
|
|
242
|
+
if (item.categories?.length)
|
|
243
|
+
console.log(`categories: ${item.categories.join(', ')}`);
|
|
244
|
+
if (item.domains?.length)
|
|
245
|
+
console.log(`domains: ${item.domains.join(', ')}`);
|
|
246
|
+
if (item.classification_reason)
|
|
247
|
+
console.log(`reason: ${item.classification_reason}`);
|
|
248
|
+
if (item.classification_engine) {
|
|
249
|
+
console.log(`classified_by: ${item.classification_engine}${item.classification_model ? ` (${item.classification_model})` : ''}`);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
async function runProbeCommand(options) {
|
|
253
|
+
console.log(LOGO);
|
|
254
|
+
showIntro();
|
|
255
|
+
console.log(`
|
|
256
|
+
Reading Chrome cookies from:
|
|
257
|
+
${options.chromeUserDataDir}/${options.chromeProfileDirectory}
|
|
258
|
+
`);
|
|
259
|
+
const cookies = extractChromeYoutubeCookies(options.chromeUserDataDir, options.chromeProfileDirectory);
|
|
260
|
+
const report = await probeLikesHistory({
|
|
261
|
+
cookieHeader: cookies.cookieHeader,
|
|
262
|
+
sapisid: cookies.sapisid,
|
|
263
|
+
maxPages: options.maxPages,
|
|
264
|
+
delayMs: options.delayMs,
|
|
265
|
+
maxMinutes: options.maxMinutes,
|
|
266
|
+
baselineCeiling: options.baselineCeiling,
|
|
267
|
+
});
|
|
268
|
+
report.chromeUserDataDir = options.chromeUserDataDir;
|
|
269
|
+
report.chromeProfileDirectory = options.chromeProfileDirectory;
|
|
270
|
+
saveProbeReport(report);
|
|
271
|
+
console.log(`
|
|
272
|
+
Probe complete.
|
|
273
|
+
|
|
274
|
+
Liked page title: ${report.pageTitle}
|
|
275
|
+
YouTube page header count: ${report.statedVideoCount ?? 'unknown'}
|
|
276
|
+
Discovered through direct web continuation: ${report.discoveredCount.toLocaleString()}
|
|
277
|
+
Existing comparison ceiling: ${report.baselineCeiling.toLocaleString()}
|
|
278
|
+
Stop reason: ${report.stopReason}
|
|
279
|
+
`);
|
|
280
|
+
if (!report.browserMethodBeatCeiling) {
|
|
281
|
+
console.log(`
|
|
282
|
+
Feasibility gate failed.
|
|
283
|
+
|
|
284
|
+
The browser-only continuation path still does not outperform the
|
|
285
|
+
existing archive ceiling on this machine, so the sync story remains
|
|
286
|
+
exploratory rather than production-ready.
|
|
287
|
+
`);
|
|
288
|
+
process.exitCode = 1;
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
console.log(`
|
|
292
|
+
Feasibility gate passed.
|
|
293
|
+
`);
|
|
294
|
+
}
|
|
295
|
+
async function runImportCommand(filePath) {
|
|
296
|
+
console.log(LOGO);
|
|
297
|
+
showIntro();
|
|
298
|
+
console.log(`
|
|
299
|
+
Importing archive:
|
|
300
|
+
${filePath}
|
|
301
|
+
`);
|
|
302
|
+
const result = await importVideoArchive(filePath);
|
|
303
|
+
console.log(`
|
|
304
|
+
✓ ${result.imported.toLocaleString()} input videos imported
|
|
305
|
+
✓ ${result.total.toLocaleString()} total videos in local archive
|
|
306
|
+
✓ JSONL: ${videosJsonlPath()}
|
|
307
|
+
✓ SQLite: ${videosDbPath()}
|
|
308
|
+
`);
|
|
309
|
+
console.log(`
|
|
310
|
+
Next steps:
|
|
311
|
+
ytl enrich-channels
|
|
312
|
+
ytl classify
|
|
313
|
+
ytl classify-domains
|
|
314
|
+
ytl status
|
|
315
|
+
`);
|
|
316
|
+
}
|
|
317
|
+
async function runEnrichChannelsCommand(options) {
|
|
318
|
+
if (!await requireImportedData())
|
|
319
|
+
return;
|
|
320
|
+
console.log(LOGO);
|
|
321
|
+
showIntro();
|
|
322
|
+
console.log(`
|
|
323
|
+
Repairing uploader metadata from YouTube's public oEmbed endpoint.
|
|
324
|
+
This updates the imported channel fields in place so search, show,
|
|
325
|
+
and viz reflect each video's actual uploader rather than the likes
|
|
326
|
+
playlist owner.
|
|
327
|
+
`);
|
|
328
|
+
const started = Date.now();
|
|
329
|
+
process.stderr.write('Enriching channel metadata...\n');
|
|
330
|
+
const result = await enrichChannels({
|
|
331
|
+
limit: options.limit,
|
|
332
|
+
concurrency: options.concurrency ?? 8,
|
|
333
|
+
force: options.force,
|
|
334
|
+
onProgress: (done, total) => {
|
|
335
|
+
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
|
|
336
|
+
const elapsed = Math.round((Date.now() - started) / 1000);
|
|
337
|
+
process.stderr.write(` Channels: ${done}/${total} (${pct}%) │ ${elapsed}s elapsed\n`);
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
console.log(`
|
|
341
|
+
Attempted: ${result.attempted.toLocaleString()}
|
|
342
|
+
Updated: ${result.updated.toLocaleString()}
|
|
343
|
+
Skipped: ${result.skipped.toLocaleString()}
|
|
344
|
+
Failed: ${result.failed.toLocaleString()}
|
|
345
|
+
${result.dominantFallbackTitle ? `Detected fallback importer signal: ${result.dominantFallbackTitle}${result.dominantFallbackId ? ` (${result.dominantFallbackId})` : ''}\n` : ''}Next:
|
|
346
|
+
ytl viz
|
|
347
|
+
ytl search "your query"
|
|
348
|
+
ytl show <video-id>
|
|
349
|
+
`);
|
|
350
|
+
}
|
|
351
|
+
async function runClassifyCommand(options) {
|
|
352
|
+
if (!await requireImportedData())
|
|
353
|
+
return;
|
|
354
|
+
console.log(LOGO);
|
|
355
|
+
showIntro();
|
|
356
|
+
const setup = await resolveClassifySetup({
|
|
357
|
+
engine: options.engine,
|
|
358
|
+
model: options.model,
|
|
359
|
+
batchSize: options.batchSize,
|
|
360
|
+
concurrency: options.concurrency,
|
|
361
|
+
limit: options.limit,
|
|
362
|
+
defaultGeminiModel: GEMINI_DEFAULT_MODEL,
|
|
363
|
+
defaultBatchSize: 50,
|
|
364
|
+
defaultConcurrency: 10,
|
|
365
|
+
});
|
|
366
|
+
if (!setup) {
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
showClassifyPlan(setup);
|
|
370
|
+
const categoryStart = Date.now();
|
|
371
|
+
process.stderr.write(`Classifying categories with ${setup.engine === 'gemini' ? `Gemini (${setup.model}, batches of ${setup.batchSize})` : `${setup.engine} CLI (batches of ${setup.batchSize})`}...\n`);
|
|
372
|
+
const categories = await classifyCategories({
|
|
373
|
+
engine: setup.engine,
|
|
374
|
+
model: setup.model,
|
|
375
|
+
batchSize: setup.batchSize,
|
|
376
|
+
concurrency: setup.concurrency,
|
|
377
|
+
limit: setup.limit,
|
|
378
|
+
onBatch: (done, total) => {
|
|
379
|
+
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
|
|
380
|
+
const elapsed = Math.round((Date.now() - categoryStart) / 1000);
|
|
381
|
+
process.stderr.write(` Categories: ${done}/${total} (${pct}%) │ ${elapsed}s elapsed\n`);
|
|
382
|
+
},
|
|
383
|
+
});
|
|
384
|
+
console.log(`
|
|
385
|
+
Engine: ${categories.engine}
|
|
386
|
+
${categories.model ? `Model: ${categories.model}\n` : ''}Categories: ${categories.classified}/${categories.totalPending} classified
|
|
387
|
+
`);
|
|
388
|
+
const domainStart = Date.now();
|
|
389
|
+
process.stderr.write(`Classifying domains with ${setup.engine === 'gemini' ? `Gemini (${setup.model}, batches of ${setup.batchSize})` : `${setup.engine} CLI (batches of ${setup.batchSize})`}...\n`);
|
|
390
|
+
const domains = await classifyDomains({
|
|
391
|
+
engine: setup.engine,
|
|
392
|
+
model: setup.model,
|
|
393
|
+
batchSize: setup.batchSize,
|
|
394
|
+
concurrency: setup.concurrency,
|
|
395
|
+
limit: setup.limit,
|
|
396
|
+
onBatch: (done, total) => {
|
|
397
|
+
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
|
|
398
|
+
const elapsed = Math.round((Date.now() - domainStart) / 1000);
|
|
399
|
+
process.stderr.write(` Domains: ${done}/${total} (${pct}%) │ ${elapsed}s elapsed\n`);
|
|
400
|
+
},
|
|
401
|
+
});
|
|
402
|
+
console.log(`
|
|
403
|
+
Domains: ${domains.classified}/${domains.totalPending} classified
|
|
404
|
+
`);
|
|
405
|
+
}
|
|
406
|
+
async function runClassifyDomainsCommand(options) {
|
|
407
|
+
if (!await requireImportedData())
|
|
408
|
+
return;
|
|
409
|
+
console.log(LOGO);
|
|
410
|
+
showIntro();
|
|
411
|
+
const setup = await resolveClassifySetup({
|
|
412
|
+
engine: options.engine,
|
|
413
|
+
model: options.model,
|
|
414
|
+
batchSize: options.batchSize,
|
|
415
|
+
concurrency: options.concurrency,
|
|
416
|
+
limit: options.limit,
|
|
417
|
+
defaultGeminiModel: GEMINI_DEFAULT_MODEL,
|
|
418
|
+
defaultBatchSize: 50,
|
|
419
|
+
defaultConcurrency: 10,
|
|
420
|
+
});
|
|
421
|
+
if (!setup) {
|
|
422
|
+
return;
|
|
423
|
+
}
|
|
424
|
+
showClassifyPlan(setup);
|
|
425
|
+
const start = Date.now();
|
|
426
|
+
process.stderr.write(`Classifying domains with ${setup.engine === 'gemini' ? `Gemini (${setup.model}, batches of ${setup.batchSize})` : `${setup.engine} CLI (batches of ${setup.batchSize})`}...\n`);
|
|
427
|
+
const result = await classifyDomains({
|
|
428
|
+
engine: setup.engine,
|
|
429
|
+
all: options.all,
|
|
430
|
+
model: setup.model,
|
|
431
|
+
batchSize: setup.batchSize,
|
|
432
|
+
concurrency: setup.concurrency,
|
|
433
|
+
limit: setup.limit,
|
|
434
|
+
onBatch: (done, total) => {
|
|
435
|
+
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
|
|
436
|
+
const elapsed = Math.round((Date.now() - start) / 1000);
|
|
437
|
+
process.stderr.write(` Domains: ${done}/${total} (${pct}%) │ ${elapsed}s elapsed\n`);
|
|
438
|
+
},
|
|
439
|
+
});
|
|
440
|
+
console.log(`
|
|
441
|
+
Engine: ${result.engine}
|
|
442
|
+
${result.model ? `Model: ${result.model}\n` : ''}Domains: ${result.classified}/${result.totalPending} classified
|
|
443
|
+
`);
|
|
444
|
+
}
|
|
445
|
+
export function buildCli() {
|
|
446
|
+
const program = new Command();
|
|
447
|
+
program
|
|
448
|
+
.name('ytl')
|
|
449
|
+
.description('Import, search, classify, and inspect your YouTube liked videos locally.')
|
|
450
|
+
.version('0.1.0')
|
|
451
|
+
.showHelpAfterError()
|
|
452
|
+
.action(() => {
|
|
453
|
+
console.log(LOGO);
|
|
454
|
+
showIntro();
|
|
455
|
+
showPaths();
|
|
456
|
+
console.log(`
|
|
457
|
+
Get started:
|
|
458
|
+
ytl import /path/to/liked_videos.json
|
|
459
|
+
ytl enrich-channels
|
|
460
|
+
ytl classify
|
|
461
|
+
ytl status
|
|
462
|
+
`);
|
|
463
|
+
});
|
|
464
|
+
program
|
|
465
|
+
.command('sync')
|
|
466
|
+
.description('Run the exploratory browser-session feasibility probe for YouTube Likes.')
|
|
467
|
+
.option('--full', 'Accepted for FT-style compatibility.', false)
|
|
468
|
+
.option('--classify', 'Accepted for FT-style compatibility.', false)
|
|
469
|
+
.option('--max-pages <n>', 'Maximum continuation pages to request.', '80')
|
|
470
|
+
.option('--delay-ms <ms>', 'Delay between continuation requests.', '0')
|
|
471
|
+
.option('--max-minutes <n>', 'Maximum runtime in minutes.', '10')
|
|
472
|
+
.option('--chrome-user-data-dir <path>', 'Chrome user data dir.', defaultChromeUserDataDir())
|
|
473
|
+
.option('--chrome-profile-directory <name>', 'Chrome profile directory.', 'Default')
|
|
474
|
+
.option('--baseline-ceiling <n>', 'Comparison ceiling that the browser method must beat.', '4953')
|
|
475
|
+
.action(safe(async (options) => {
|
|
476
|
+
await runProbeCommand({
|
|
477
|
+
maxPages: Number.parseInt(options.maxPages, 10),
|
|
478
|
+
delayMs: Number.parseInt(options.delayMs, 10),
|
|
479
|
+
maxMinutes: Number.parseInt(options.maxMinutes, 10),
|
|
480
|
+
chromeUserDataDir: options.chromeUserDataDir,
|
|
481
|
+
chromeProfileDirectory: options.chromeProfileDirectory,
|
|
482
|
+
baselineCeiling: Number.parseInt(options.baselineCeiling, 10),
|
|
483
|
+
});
|
|
484
|
+
}));
|
|
485
|
+
program
|
|
486
|
+
.command('import')
|
|
487
|
+
.description('Import a YouTube liked-videos JSON archive into the local ytl store.')
|
|
488
|
+
.argument('<path>', 'Path to a JSON archive like liked_videos.json')
|
|
489
|
+
.action(safe(async (filePath) => {
|
|
490
|
+
await runImportCommand(filePath);
|
|
491
|
+
}));
|
|
492
|
+
program
|
|
493
|
+
.command('classify')
|
|
494
|
+
.description('Classify imported videos by category and domain using Gemini, Claude, or Codex.')
|
|
495
|
+
.option('--engine <engine>', 'Engine: gemini, claude, or codex (omit to choose interactively)')
|
|
496
|
+
.option('--model <name>', 'Gemini model name', GEMINI_DEFAULT_MODEL)
|
|
497
|
+
.option('--batch-size <n>', 'Batch size', (v) => Number(v), 50)
|
|
498
|
+
.option('--concurrency <n>', 'Concurrent Gemini batches (Gemini only)', (v) => Number(v), 10)
|
|
499
|
+
.option('--limit <n>', 'Only classify the first N pending videos', (v) => Number(v))
|
|
500
|
+
.action(safe(async (options) => {
|
|
501
|
+
await runClassifyCommand({
|
|
502
|
+
engine: options.engine ? String(options.engine) : undefined,
|
|
503
|
+
model: options.model,
|
|
504
|
+
batchSize: Number(options.batchSize) || 50,
|
|
505
|
+
concurrency: Number(options.concurrency) || 10,
|
|
506
|
+
limit: typeof options.limit === 'number' && !Number.isNaN(options.limit) ? options.limit : undefined,
|
|
507
|
+
});
|
|
508
|
+
}));
|
|
509
|
+
program
|
|
510
|
+
.command('enrich-channels')
|
|
511
|
+
.description('Repair uploader channel metadata for imported videos.')
|
|
512
|
+
.option('--limit <n>', 'Only enrich the first N candidate videos', (v) => Number(v))
|
|
513
|
+
.option('--concurrency <n>', 'Concurrent oEmbed requests', (v) => Number(v), 8)
|
|
514
|
+
.option('--force', 'Re-check all imported videos, not just suspicious ones', false)
|
|
515
|
+
.action(safe(async (options) => {
|
|
516
|
+
await runEnrichChannelsCommand({
|
|
517
|
+
limit: typeof options.limit === 'number' && !Number.isNaN(options.limit) ? options.limit : undefined,
|
|
518
|
+
concurrency: Number(options.concurrency) || 8,
|
|
519
|
+
force: Boolean(options.force),
|
|
520
|
+
});
|
|
521
|
+
}));
|
|
522
|
+
program
|
|
523
|
+
.command('classify-domains')
|
|
524
|
+
.description('Classify imported videos by subject domain using Gemini, Claude, or Codex.')
|
|
525
|
+
.option('--all', 'Re-classify all videos, not just missing domains')
|
|
526
|
+
.option('--engine <engine>', 'Engine: gemini, claude, or codex (omit to choose interactively)')
|
|
527
|
+
.option('--model <name>', 'Gemini model name', GEMINI_DEFAULT_MODEL)
|
|
528
|
+
.option('--batch-size <n>', 'Batch size', (v) => Number(v), 50)
|
|
529
|
+
.option('--concurrency <n>', 'Concurrent Gemini batches (Gemini only)', (v) => Number(v), 10)
|
|
530
|
+
.option('--limit <n>', 'Only classify the first N pending videos', (v) => Number(v))
|
|
531
|
+
.action(safe(async (options) => {
|
|
532
|
+
await runClassifyDomainsCommand({
|
|
533
|
+
all: Boolean(options.all),
|
|
534
|
+
engine: options.engine ? String(options.engine) : undefined,
|
|
535
|
+
model: options.model,
|
|
536
|
+
batchSize: Number(options.batchSize) || 50,
|
|
537
|
+
concurrency: Number(options.concurrency) || 10,
|
|
538
|
+
limit: typeof options.limit === 'number' && !Number.isNaN(options.limit) ? options.limit : undefined,
|
|
539
|
+
});
|
|
540
|
+
}));
|
|
541
|
+
program
|
|
542
|
+
.command('path')
|
|
543
|
+
.description('Show local data paths.')
|
|
544
|
+
.action(showPaths);
|
|
545
|
+
program
|
|
546
|
+
.command('status')
|
|
547
|
+
.description('Show imported archive counts, last classification engine/model, and last probe result.')
|
|
548
|
+
.action(safe(async () => {
|
|
549
|
+
await showStatus();
|
|
550
|
+
}));
|
|
551
|
+
program
|
|
552
|
+
.command('viz')
|
|
553
|
+
.description('Show a terminal summary of archive coverage, labels, and top channels.')
|
|
554
|
+
.action(safe(async () => {
|
|
555
|
+
await showViz();
|
|
556
|
+
}));
|
|
557
|
+
program
|
|
558
|
+
.command('stats')
|
|
559
|
+
.description('Show the same summary dashboard as viz.')
|
|
560
|
+
.action(safe(async () => {
|
|
561
|
+
await showViz();
|
|
562
|
+
}));
|
|
563
|
+
program
|
|
564
|
+
.command('search')
|
|
565
|
+
.description('Search your imported archive using local full-text search.')
|
|
566
|
+
.argument('<query>', 'Search query')
|
|
567
|
+
.option('--channel <name>', 'Filter by channel title')
|
|
568
|
+
.option('--category <slug>', 'Filter by primary category')
|
|
569
|
+
.option('--domain <slug>', 'Filter by primary domain')
|
|
570
|
+
.option('--limit <n>', 'Limit results', (v) => Number(v), 20)
|
|
571
|
+
.option('--json', 'JSON output')
|
|
572
|
+
.action(safe(async (query, options) => {
|
|
573
|
+
await runSearchCommand(query, {
|
|
574
|
+
channel: options.channel,
|
|
575
|
+
category: options.category,
|
|
576
|
+
domain: options.domain,
|
|
577
|
+
limit: Number(options.limit) || 20,
|
|
578
|
+
json: Boolean(options.json),
|
|
579
|
+
});
|
|
580
|
+
}));
|
|
581
|
+
program
|
|
582
|
+
.command('list')
|
|
583
|
+
.description('List imported videos with filters.')
|
|
584
|
+
.option('--query <q>', 'Optional full-text query')
|
|
585
|
+
.option('--channel <name>', 'Filter by channel title')
|
|
586
|
+
.option('--category <slug>', 'Filter by primary category')
|
|
587
|
+
.option('--domain <slug>', 'Filter by primary domain')
|
|
588
|
+
.option('--privacy <value>', 'Filter by privacy status')
|
|
589
|
+
.option('--after <date>', 'Filter after YYYY-MM-DD')
|
|
590
|
+
.option('--before <date>', 'Filter before YYYY-MM-DD')
|
|
591
|
+
.option('--limit <n>', 'Limit results', (v) => Number(v), 30)
|
|
592
|
+
.option('--offset <n>', 'Offset results', (v) => Number(v), 0)
|
|
593
|
+
.option('--json', 'JSON output')
|
|
594
|
+
.action(safe(async (options) => {
|
|
595
|
+
await runListCommand({
|
|
596
|
+
query: options.query,
|
|
597
|
+
channel: options.channel,
|
|
598
|
+
category: options.category,
|
|
599
|
+
domain: options.domain,
|
|
600
|
+
privacy: options.privacy,
|
|
601
|
+
after: options.after,
|
|
602
|
+
before: options.before,
|
|
603
|
+
limit: Number(options.limit) || 30,
|
|
604
|
+
offset: Number(options.offset) || 0,
|
|
605
|
+
json: Boolean(options.json),
|
|
606
|
+
});
|
|
607
|
+
}));
|
|
608
|
+
program
|
|
609
|
+
.command('show')
|
|
610
|
+
.description('Show one imported video in detail.')
|
|
611
|
+
.argument('<id>', 'Stored id, YouTube video id, or full URL')
|
|
612
|
+
.option('--json', 'JSON output')
|
|
613
|
+
.action(safe(async (id, options) => {
|
|
614
|
+
await runShowCommand(id, { json: Boolean(options.json) });
|
|
615
|
+
}));
|
|
616
|
+
return program;
|
|
617
|
+
}
|
|
618
|
+
export async function run(argv = process.argv) {
|
|
619
|
+
loadEnv();
|
|
620
|
+
await buildCli().parseAsync(argv);
|
|
621
|
+
}
|
|
622
|
+
const isEntrypoint = process.argv[1] && import.meta.url === new URL(`file://${process.argv[1]}`).href;
|
|
623
|
+
if (isEntrypoint) {
|
|
624
|
+
run(process.argv);
|
|
625
|
+
}
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import dotenv from 'dotenv';
|
|
4
|
+
import { dataDir, ensureDataDir } from './paths.js';
|
|
5
|
+
let loadedEnvPath = null;
|
|
6
|
+
export function geminiEnvLocalPath() {
|
|
7
|
+
return path.join(dataDir(), '.env.local');
|
|
8
|
+
}
|
|
9
|
+
export function loadEnv() {
|
|
10
|
+
const envPath = geminiEnvLocalPath();
|
|
11
|
+
if (loadedEnvPath === envPath) {
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
ensureDataDir();
|
|
15
|
+
dotenv.config({ path: envPath, override: false, quiet: true });
|
|
16
|
+
loadedEnvPath = envPath;
|
|
17
|
+
}
|
|
18
|
+
export function writeGeminiApiKeyToEnvLocal(apiKey) {
|
|
19
|
+
ensureDataDir();
|
|
20
|
+
const envPath = geminiEnvLocalPath();
|
|
21
|
+
const keyLine = `GEMINI_API_KEY=${apiKey}`;
|
|
22
|
+
let lines = [];
|
|
23
|
+
if (fs.existsSync(envPath)) {
|
|
24
|
+
lines = fs.readFileSync(envPath, 'utf8')
|
|
25
|
+
.split(/\r?\n/)
|
|
26
|
+
.filter((line) => line.length > 0);
|
|
27
|
+
}
|
|
28
|
+
let updated = false;
|
|
29
|
+
lines = lines.map((line) => {
|
|
30
|
+
if (/^\s*GEMINI_API_KEY=/.test(line)) {
|
|
31
|
+
updated = true;
|
|
32
|
+
return keyLine;
|
|
33
|
+
}
|
|
34
|
+
return line;
|
|
35
|
+
});
|
|
36
|
+
if (!updated) {
|
|
37
|
+
lines.push(keyLine);
|
|
38
|
+
}
|
|
39
|
+
fs.writeFileSync(envPath, `${lines.join('\n')}\n`, { mode: 0o600 });
|
|
40
|
+
return envPath;
|
|
41
|
+
}
|
package/dist/db.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { createRequire } from 'node:module';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
const require = createRequire(import.meta.url);
|
|
5
|
+
let sqlPromise;
|
|
6
|
+
function getSql() {
|
|
7
|
+
if (!sqlPromise) {
|
|
8
|
+
const initSqlJs = require('sql.js-fts5');
|
|
9
|
+
const wasmPath = require.resolve('sql.js-fts5/dist/sql-wasm.wasm');
|
|
10
|
+
const wasmBinary = fs.readFileSync(wasmPath);
|
|
11
|
+
sqlPromise = initSqlJs({ wasmBinary });
|
|
12
|
+
}
|
|
13
|
+
return sqlPromise;
|
|
14
|
+
}
|
|
15
|
+
export async function openDb(filePath) {
|
|
16
|
+
const SQL = await getSql();
|
|
17
|
+
if (fs.existsSync(filePath)) {
|
|
18
|
+
return new SQL.Database(fs.readFileSync(filePath));
|
|
19
|
+
}
|
|
20
|
+
return new SQL.Database();
|
|
21
|
+
}
|
|
22
|
+
export function saveDb(db, filePath) {
|
|
23
|
+
const dir = path.dirname(filePath);
|
|
24
|
+
if (!fs.existsSync(dir)) {
|
|
25
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
fs.writeFileSync(filePath, Buffer.from(db.export()));
|
|
28
|
+
}
|