yt-liked 0.2.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,534 @@
1
+ import { openDb, saveDb } from './db.js';
2
+ import { readJsonLines, writeJsonLines } from './jsonl.js';
3
+ import { videosDbPath, videosJsonlPath } from './paths.js';
4
+ import { readProbeReport } from './report.js';
5
+ function initSchema(db) {
6
+ db.run(`
7
+ CREATE TABLE IF NOT EXISTS videos (
8
+ id TEXT PRIMARY KEY,
9
+ video_id TEXT,
10
+ playlist_item_id TEXT,
11
+ url TEXT NOT NULL,
12
+ title TEXT NOT NULL,
13
+ description TEXT,
14
+ channel_id TEXT,
15
+ channel_title TEXT,
16
+ liked_at TEXT,
17
+ video_published_at TEXT,
18
+ duration TEXT,
19
+ privacy_status TEXT,
20
+ position INTEGER,
21
+ categories TEXT,
22
+ primary_category TEXT,
23
+ domains TEXT,
24
+ primary_domain TEXT,
25
+ classification_reason TEXT,
26
+ classification_engine TEXT,
27
+ classification_model TEXT,
28
+ classified_at TEXT,
29
+ imported_at TEXT NOT NULL
30
+ );
31
+ `);
32
+ db.run(`
33
+ CREATE VIRTUAL TABLE IF NOT EXISTS videos_fts
34
+ USING fts5(title, description, channel_title, content='videos', content_rowid='rowid');
35
+ `);
36
+ }
37
+ function escapeSql(value) {
38
+ return value.replace(/'/g, "''");
39
+ }
40
+ function insertRecord(db, record) {
41
+ const stmt = db.prepare(`
42
+ INSERT OR REPLACE INTO videos (
43
+ id, video_id, playlist_item_id, url, title, description, channel_id, channel_title,
44
+ liked_at, video_published_at, duration, privacy_status, position,
45
+ categories, primary_category, domains, primary_domain,
46
+ classification_reason, classification_engine, classification_model, classified_at, imported_at
47
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
48
+ `);
49
+ stmt.run([
50
+ record.id,
51
+ record.video_id,
52
+ record.playlist_item_id,
53
+ record.url,
54
+ record.title,
55
+ record.description,
56
+ record.channel_id,
57
+ record.channel_title,
58
+ record.liked_at,
59
+ record.video_published_at,
60
+ record.duration,
61
+ record.privacy_status,
62
+ record.position,
63
+ record.categories ? JSON.stringify(record.categories) : null,
64
+ record.primary_category,
65
+ record.domains ? JSON.stringify(record.domains) : null,
66
+ record.primary_domain,
67
+ record.classification_reason,
68
+ record.classification_engine,
69
+ record.classification_model,
70
+ record.classified_at,
71
+ record.imported_at,
72
+ ]);
73
+ stmt.free();
74
+ }
75
+ function rowToRecord(row) {
76
+ return {
77
+ id: row[0],
78
+ video_id: row[1],
79
+ playlist_item_id: row[2],
80
+ url: row[3],
81
+ title: row[4],
82
+ description: row[5],
83
+ channel_id: row[6],
84
+ channel_title: row[7],
85
+ liked_at: row[8],
86
+ video_published_at: row[9],
87
+ duration: row[10],
88
+ privacy_status: row[11],
89
+ position: typeof row[12] === 'number' ? row[12] : null,
90
+ categories: row[13] ? JSON.parse(row[13]) : null,
91
+ primary_category: row[14],
92
+ domains: row[15] ? JSON.parse(row[15]) : null,
93
+ primary_domain: row[16],
94
+ classification_reason: row[17],
95
+ classification_engine: row[18],
96
+ classification_model: row[19],
97
+ classified_at: row[20],
98
+ imported_at: row[21],
99
+ };
100
+ }
101
+ export async function buildIndex(options) {
102
+ const db = await openDb(videosDbPath());
103
+ const records = await readJsonLines(videosJsonlPath());
104
+ try {
105
+ if (options?.force) {
106
+ db.run('DROP TABLE IF EXISTS videos_fts');
107
+ db.run('DROP TABLE IF EXISTS videos');
108
+ }
109
+ initSchema(db);
110
+ db.run('DELETE FROM videos');
111
+ db.run('BEGIN TRANSACTION');
112
+ for (const record of records) {
113
+ insertRecord(db, record);
114
+ }
115
+ db.run('COMMIT');
116
+ db.run(`INSERT INTO videos_fts(videos_fts) VALUES('rebuild')`);
117
+ saveDb(db, videosDbPath());
118
+ return { recordCount: records.length, newRecords: records.length };
119
+ }
120
+ finally {
121
+ db.close();
122
+ }
123
+ }
124
+ export async function exportDbToJsonl(db) {
125
+ const localDb = db ?? await openDb(videosDbPath());
126
+ const ownsDb = !db;
127
+ try {
128
+ initSchema(localDb);
129
+ const rows = localDb.exec(`
130
+ SELECT
131
+ id, video_id, playlist_item_id, url, title, description, channel_id, channel_title,
132
+ liked_at, video_published_at, duration, privacy_status, position,
133
+ categories, primary_category, domains, primary_domain,
134
+ classification_reason, classification_engine, classification_model, classified_at, imported_at
135
+ FROM videos
136
+ ORDER BY COALESCE(position, 999999999) ASC, title ASC
137
+ `);
138
+ const records = (rows[0]?.values ?? []).map((row) => rowToRecord(row));
139
+ writeJsonLines(videosJsonlPath(), records);
140
+ if (ownsDb) {
141
+ saveDb(localDb, videosDbPath());
142
+ }
143
+ }
144
+ finally {
145
+ if (ownsDb)
146
+ localDb.close();
147
+ }
148
+ }
149
+ export async function getVideoStatusView() {
150
+ const db = await openDb(videosDbPath());
151
+ let lastProbe = null;
152
+ try {
153
+ initSchema(db);
154
+ const totals = db.exec(`
155
+ SELECT
156
+ COUNT(*) AS imported_count,
157
+ SUM(CASE WHEN primary_category IS NOT NULL THEN 1 ELSE 0 END) AS categorized_count,
158
+ SUM(CASE WHEN primary_domain IS NOT NULL THEN 1 ELSE 0 END) AS domain_count
159
+ FROM videos
160
+ `);
161
+ const lastRunRows = db.exec(`
162
+ SELECT classification_engine, classification_model
163
+ FROM videos
164
+ WHERE classification_engine IS NOT NULL OR classification_model IS NOT NULL
165
+ ORDER BY classified_at DESC
166
+ LIMIT 1
167
+ `);
168
+ const values = totals[0]?.values?.[0] ?? [0, 0, 0];
169
+ lastProbe = readProbeReport();
170
+ return {
171
+ importedCount: Number(values[0] ?? 0),
172
+ categorizedCount: Number(values[1] ?? 0),
173
+ domainCount: Number(values[2] ?? 0),
174
+ lastClassificationEngine: lastRunRows[0]?.values?.[0]?.[0] ?? null,
175
+ lastClassificationModel: lastRunRows[0]?.values?.[0]?.[1] ?? null,
176
+ lastProbe,
177
+ };
178
+ }
179
+ finally {
180
+ db.close();
181
+ }
182
+ }
183
+ function rowsToLabelCounts(rows) {
184
+ return rows.map((row) => ({
185
+ label: String(row[0] ?? 'unknown'),
186
+ count: Number(row[1] ?? 0),
187
+ }));
188
+ }
189
+ function rowsToChannelCounts(rows) {
190
+ return rows.map((row) => ({
191
+ channelTitle: String(row[0] ?? 'Unknown channel'),
192
+ count: Number(row[1] ?? 0),
193
+ }));
194
+ }
195
+ function rowToSearchResult(row) {
196
+ return {
197
+ id: row[0],
198
+ url: row[1],
199
+ title: row[2],
200
+ description: row[3],
201
+ channelTitle: row[4],
202
+ likedAt: row[5],
203
+ primaryCategory: row[6],
204
+ primaryDomain: row[7],
205
+ score: Number(row[8] ?? 0),
206
+ };
207
+ }
208
+ function buildVideoWhereClause(filters) {
209
+ const conditions = [];
210
+ const params = [];
211
+ if (filters.query) {
212
+ conditions.push(`v.rowid IN (SELECT rowid FROM videos_fts WHERE videos_fts MATCH ?)`);
213
+ params.push(filters.query);
214
+ }
215
+ if (filters.channel) {
216
+ conditions.push(`v.channel_title = ? COLLATE NOCASE`);
217
+ params.push(filters.channel);
218
+ }
219
+ if (filters.after) {
220
+ conditions.push(`COALESCE(v.liked_at, v.video_published_at, v.imported_at) >= ?`);
221
+ params.push(filters.after);
222
+ }
223
+ if (filters.before) {
224
+ conditions.push(`COALESCE(v.liked_at, v.video_published_at, v.imported_at) <= ?`);
225
+ params.push(filters.before);
226
+ }
227
+ if (filters.category) {
228
+ conditions.push(`v.primary_category = ? COLLATE NOCASE`);
229
+ params.push(filters.category);
230
+ }
231
+ if (filters.domain) {
232
+ conditions.push(`v.primary_domain = ? COLLATE NOCASE`);
233
+ params.push(filters.domain);
234
+ }
235
+ if (filters.privacy) {
236
+ conditions.push(`COALESCE(v.privacy_status, 'unknown') = ? COLLATE NOCASE`);
237
+ params.push(filters.privacy);
238
+ }
239
+ return {
240
+ where: conditions.length ? `WHERE ${conditions.join(' AND ')}` : '',
241
+ params,
242
+ };
243
+ }
244
+ function videoSortClause(direction = 'desc') {
245
+ const normalized = direction === 'asc' ? 'ASC' : 'DESC';
246
+ return `
247
+ ORDER BY
248
+ COALESCE(v.liked_at, v.video_published_at, v.imported_at) ${normalized},
249
+ v.title ${normalized}
250
+ `;
251
+ }
252
+ export async function getVideoVizView() {
253
+ const db = await openDb(videosDbPath());
254
+ try {
255
+ initSchema(db);
256
+ const totals = db.exec(`
257
+ SELECT
258
+ COUNT(*) AS imported_count,
259
+ SUM(CASE WHEN primary_category IS NOT NULL THEN 1 ELSE 0 END) AS categorized_count,
260
+ SUM(CASE WHEN primary_domain IS NOT NULL THEN 1 ELSE 0 END) AS domain_count
261
+ FROM videos
262
+ `);
263
+ const categoryRows = db.exec(`
264
+ SELECT primary_category, COUNT(*) AS n
265
+ FROM videos
266
+ WHERE primary_category IS NOT NULL
267
+ GROUP BY primary_category
268
+ ORDER BY n DESC, primary_category ASC
269
+ LIMIT 12
270
+ `);
271
+ const domainRows = db.exec(`
272
+ SELECT primary_domain, COUNT(*) AS n
273
+ FROM videos
274
+ WHERE primary_domain IS NOT NULL
275
+ GROUP BY primary_domain
276
+ ORDER BY n DESC, primary_domain ASC
277
+ LIMIT 12
278
+ `);
279
+ const channelRows = db.exec(`
280
+ SELECT COALESCE(NULLIF(channel_title, ''), 'Unknown channel') AS channel_title, COUNT(*) AS n
281
+ FROM videos
282
+ GROUP BY channel_title
283
+ ORDER BY n DESC, channel_title ASC
284
+ LIMIT 10
285
+ `);
286
+ const monthlyRows = db.exec(`
287
+ SELECT substr(liked_at, 1, 7) AS ym, COUNT(*) AS n
288
+ FROM videos
289
+ WHERE liked_at IS NOT NULL AND liked_at != ''
290
+ GROUP BY ym
291
+ ORDER BY ym ASC
292
+ LIMIT 18
293
+ `);
294
+ const privacyRows = db.exec(`
295
+ SELECT COALESCE(NULLIF(privacy_status, ''), 'unknown') AS privacy_status, COUNT(*) AS n
296
+ FROM videos
297
+ GROUP BY privacy_status
298
+ ORDER BY n DESC, privacy_status ASC
299
+ LIMIT 6
300
+ `);
301
+ const distinctChannelRows = db.exec(`
302
+ SELECT
303
+ COUNT(DISTINCT COALESCE(NULLIF(channel_title, ''), '__missing__')) AS distinct_titles,
304
+ COUNT(DISTINCT COALESCE(NULLIF(channel_id, ''), '__missing__')) AS distinct_ids
305
+ FROM videos
306
+ `);
307
+ const values = totals[0]?.values?.[0] ?? [0, 0, 0];
308
+ const importedCount = Number(values[0] ?? 0);
309
+ const categorizedCount = Number(values[1] ?? 0);
310
+ const domainCount = Number(values[2] ?? 0);
311
+ const distinctValues = distinctChannelRows[0]?.values?.[0] ?? [0, 0];
312
+ const distinctChannelTitles = Number(distinctValues[0] ?? 0);
313
+ const distinctChannelIds = Number(distinctValues[1] ?? 0);
314
+ const channelMetadataLikelyOwnerFallback = importedCount > 25 && (distinctChannelTitles <= 1 || distinctChannelIds <= 1);
315
+ const dominantFallbackChannelTitle = String(channelRows[0]?.values?.[0]?.[0] ?? '') || null;
316
+ const dominantFallbackChannelCount = Number(channelRows[0]?.values?.[0]?.[1] ?? 0);
317
+ return {
318
+ importedCount,
319
+ categorizedCount,
320
+ domainCount,
321
+ uncategorizedCount: Math.max(0, importedCount - categorizedCount),
322
+ undomainedCount: Math.max(0, importedCount - domainCount),
323
+ topCategories: rowsToLabelCounts(categoryRows[0]?.values ?? []),
324
+ topDomains: rowsToLabelCounts(domainRows[0]?.values ?? []),
325
+ topChannels: rowsToChannelCounts(channelRows[0]?.values ?? []),
326
+ monthlyLikes: rowsToLabelCounts(monthlyRows[0]?.values ?? []),
327
+ privacyBreakdown: rowsToLabelCounts(privacyRows[0]?.values ?? []),
328
+ distinctChannelTitles,
329
+ distinctChannelIds,
330
+ channelMetadataLikelyOwnerFallback,
331
+ dominantFallbackChannelTitle,
332
+ dominantFallbackChannelCount,
333
+ };
334
+ }
335
+ finally {
336
+ db.close();
337
+ }
338
+ }
339
+ export async function searchVideos(filters) {
340
+ const db = await openDb(videosDbPath());
341
+ try {
342
+ initSchema(db);
343
+ const limit = filters.limit ?? 20;
344
+ const { where, params } = buildVideoWhereClause(filters);
345
+ const sql = `
346
+ SELECT
347
+ v.id,
348
+ v.url,
349
+ v.title,
350
+ v.description,
351
+ v.channel_title,
352
+ v.liked_at,
353
+ v.primary_category,
354
+ v.primary_domain,
355
+ bm25(videos_fts, 3.5, 1.5, 1.0) AS score
356
+ FROM videos v
357
+ JOIN videos_fts ON videos_fts.rowid = v.rowid
358
+ ${where}
359
+ ORDER BY bm25(videos_fts, 3.5, 1.5, 1.0) ASC,
360
+ COALESCE(v.liked_at, v.video_published_at, v.imported_at) DESC
361
+ LIMIT ?
362
+ `;
363
+ params.push(limit);
364
+ const rows = db.exec(sql, params);
365
+ return (rows[0]?.values ?? []).map((row) => rowToSearchResult(row));
366
+ }
367
+ finally {
368
+ db.close();
369
+ }
370
+ }
371
+ export async function listVideos(filters = {}) {
372
+ const db = await openDb(videosDbPath());
373
+ try {
374
+ initSchema(db);
375
+ const limit = filters.limit ?? 30;
376
+ const offset = filters.offset ?? 0;
377
+ const { where, params } = buildVideoWhereClause(filters);
378
+ const sql = `
379
+ SELECT
380
+ v.id, v.video_id, v.playlist_item_id, v.url, v.title, v.description, v.channel_id, v.channel_title,
381
+ v.liked_at, v.video_published_at, v.duration, v.privacy_status, v.position,
382
+ v.categories, v.primary_category, v.domains, v.primary_domain,
383
+ v.classification_reason, v.classification_engine, v.classification_model, v.classified_at, v.imported_at
384
+ FROM videos v
385
+ ${where}
386
+ ${videoSortClause(filters.sort)}
387
+ LIMIT ?
388
+ OFFSET ?
389
+ `;
390
+ params.push(limit, offset);
391
+ const rows = db.exec(sql, params);
392
+ return (rows[0]?.values ?? []).map((row) => rowToRecord(row));
393
+ }
394
+ finally {
395
+ db.close();
396
+ }
397
+ }
398
+ export async function getVideoById(id) {
399
+ const db = await openDb(videosDbPath());
400
+ try {
401
+ initSchema(db);
402
+ const rows = db.exec(`
403
+ SELECT
404
+ v.id, v.video_id, v.playlist_item_id, v.url, v.title, v.description, v.channel_id, v.channel_title,
405
+ v.liked_at, v.video_published_at, v.duration, v.privacy_status, v.position,
406
+ v.categories, v.primary_category, v.domains, v.primary_domain,
407
+ v.classification_reason, v.classification_engine, v.classification_model, v.classified_at, v.imported_at
408
+ FROM videos v
409
+ WHERE v.id = ?
410
+ LIMIT 1
411
+ `, [id]);
412
+ const row = rows[0]?.values?.[0];
413
+ return row ? rowToRecord(row) : null;
414
+ }
415
+ finally {
416
+ db.close();
417
+ }
418
+ }
419
+ export async function getVideoByLookupKey(key) {
420
+ const db = await openDb(videosDbPath());
421
+ try {
422
+ initSchema(db);
423
+ const rows = db.exec(`
424
+ SELECT
425
+ v.id, v.video_id, v.playlist_item_id, v.url, v.title, v.description, v.channel_id, v.channel_title,
426
+ v.liked_at, v.video_published_at, v.duration, v.privacy_status, v.position,
427
+ v.categories, v.primary_category, v.domains, v.primary_domain,
428
+ v.classification_reason, v.classification_engine, v.classification_model, v.classified_at, v.imported_at
429
+ FROM videos v
430
+ WHERE v.id = ?
431
+ OR COALESCE(v.video_id, '') = ?
432
+ OR v.url = ?
433
+ LIMIT 1
434
+ `, [key, key, key]);
435
+ const row = rows[0]?.values?.[0];
436
+ return row ? rowToRecord(row) : null;
437
+ }
438
+ finally {
439
+ db.close();
440
+ }
441
+ }
442
+ export async function loadClassificationItems(kind, options = {}) {
443
+ const db = await openDb(videosDbPath());
444
+ try {
445
+ initSchema(db);
446
+ const where = kind === 'categories'
447
+ ? (options.all ? '1=1' : 'primary_category IS NULL')
448
+ : (options.all ? '1=1' : 'primary_domain IS NULL');
449
+ const limitClause = options.limit ? ` LIMIT ${Math.max(1, options.limit)}` : '';
450
+ const rows = db.exec(`
451
+ SELECT id, title, description, channel_title, duration, privacy_status, categories
452
+ FROM videos
453
+ WHERE ${where}
454
+ ORDER BY RANDOM()
455
+ ${limitClause}
456
+ `);
457
+ return (rows[0]?.values ?? []).map((row) => ({
458
+ id: row[0],
459
+ title: row[1],
460
+ description: row[2],
461
+ channelTitle: row[3],
462
+ duration: row[4],
463
+ privacyStatus: row[5],
464
+ existingCategories: row[6] ? JSON.parse(row[6]) : null,
465
+ }));
466
+ }
467
+ finally {
468
+ db.close();
469
+ }
470
+ }
471
+ export async function applyCategoryUpdates(updates) {
472
+ const db = await openDb(videosDbPath());
473
+ try {
474
+ initSchema(db);
475
+ const stmt = db.prepare(`
476
+ UPDATE videos
477
+ SET categories = ?, primary_category = ?, classification_reason = ?, classification_engine = ?,
478
+ classification_model = ?, classified_at = ?
479
+ WHERE id = ?
480
+ `);
481
+ const classifiedAt = new Date().toISOString();
482
+ for (const update of updates) {
483
+ stmt.run([
484
+ JSON.stringify(update.categories),
485
+ update.primary,
486
+ update.reason,
487
+ update.engine,
488
+ update.model ?? null,
489
+ classifiedAt,
490
+ update.id,
491
+ ]);
492
+ }
493
+ stmt.free();
494
+ saveDb(db, videosDbPath());
495
+ await exportDbToJsonl(db);
496
+ }
497
+ finally {
498
+ db.close();
499
+ }
500
+ }
501
+ export async function applyDomainUpdates(updates) {
502
+ const db = await openDb(videosDbPath());
503
+ try {
504
+ initSchema(db);
505
+ const stmt = db.prepare(`
506
+ UPDATE videos
507
+ SET domains = ?, primary_domain = ?, classification_reason = COALESCE(?, classification_reason),
508
+ classification_engine = ?, classification_model = ?, classified_at = ?
509
+ WHERE id = ?
510
+ `);
511
+ const classifiedAt = new Date().toISOString();
512
+ for (const update of updates) {
513
+ stmt.run([
514
+ JSON.stringify(update.domains),
515
+ update.primary,
516
+ update.reason,
517
+ update.engine,
518
+ update.model ?? null,
519
+ classifiedAt,
520
+ update.id,
521
+ ]);
522
+ }
523
+ stmt.free();
524
+ saveDb(db, videosDbPath());
525
+ await exportDbToJsonl(db);
526
+ }
527
+ finally {
528
+ db.close();
529
+ }
530
+ }
531
+ export async function requireVideoData() {
532
+ const records = await readJsonLines(videosJsonlPath());
533
+ return records.length > 0;
534
+ }
@@ -0,0 +1,122 @@
1
+ import fs from 'node:fs';
2
+ import { videosJsonlPath } from './paths.js';
3
+ import { readVideoArchive, writeJsonLines } from './jsonl.js';
4
+ import { buildIndex } from './videos-db.js';
5
+ function toStringOrNull(value) {
6
+ if (typeof value !== 'string')
7
+ return null;
8
+ const trimmed = value.trim();
9
+ return trimmed.length > 0 ? trimmed : null;
10
+ }
11
+ function toStringArray(value) {
12
+ if (Array.isArray(value)) {
13
+ const items = value
14
+ .map((item) => (typeof item === 'string' ? item.trim().toLowerCase() : ''))
15
+ .filter(Boolean);
16
+ return items.length > 0 ? Array.from(new Set(items)) : null;
17
+ }
18
+ if (typeof value === 'string') {
19
+ const parts = value
20
+ .split(',')
21
+ .map((item) => item.trim().toLowerCase())
22
+ .filter(Boolean);
23
+ return parts.length > 0 ? Array.from(new Set(parts)) : null;
24
+ }
25
+ return null;
26
+ }
27
+ function normalizeUrl(input) {
28
+ const explicit = toStringOrNull(input.url);
29
+ if (explicit)
30
+ return explicit;
31
+ const videoId = toStringOrNull(input.video_id);
32
+ if (videoId)
33
+ return `https://www.youtube.com/watch?v=${videoId}`;
34
+ throw new Error('Could not determine a video URL for one imported item.');
35
+ }
36
+ export function normalizeImportedVideo(input, importedAt = new Date().toISOString()) {
37
+ const playlistItemId = toStringOrNull(input.playlist_item_id);
38
+ const videoId = toStringOrNull(input.video_id);
39
+ const url = normalizeUrl(input);
40
+ const id = playlistItemId ?? videoId ?? url;
41
+ const primaryCategory = toStringOrNull(input.primary_category)?.toLowerCase() ?? null;
42
+ let categories = toStringArray(input.categories) ?? toStringArray(input.tags);
43
+ if (!categories && primaryCategory) {
44
+ categories = [primaryCategory];
45
+ }
46
+ const primaryDomain = toStringOrNull(input.primary_domain)?.toLowerCase() ?? null;
47
+ let domains = toStringArray(input.domains);
48
+ if (!domains && primaryDomain) {
49
+ domains = [primaryDomain];
50
+ }
51
+ return {
52
+ id,
53
+ video_id: videoId,
54
+ playlist_item_id: playlistItemId,
55
+ url,
56
+ title: toStringOrNull(input.title) ?? '(untitled)',
57
+ description: toStringOrNull(input.description),
58
+ channel_id: toStringOrNull(input.channel_id),
59
+ channel_title: toStringOrNull(input.channel_title),
60
+ liked_at: toStringOrNull(input.published_at),
61
+ video_published_at: toStringOrNull(input.video_published_at),
62
+ duration: toStringOrNull(input.duration),
63
+ privacy_status: toStringOrNull(input.privacy_status),
64
+ position: typeof input.position === 'number' ? input.position : null,
65
+ categories,
66
+ primary_category: primaryCategory,
67
+ domains,
68
+ primary_domain: primaryDomain,
69
+ classification_reason: toStringOrNull(input.classification_reason),
70
+ classification_engine: toStringOrNull(input.classification_engine),
71
+ classification_model: toStringOrNull(input.classification_model),
72
+ classified_at: toStringOrNull(input.classified_at),
73
+ imported_at: importedAt,
74
+ };
75
+ }
76
+ export function mergeVideoRecords(existing, incoming) {
77
+ return {
78
+ ...existing,
79
+ ...incoming,
80
+ url: incoming.url || existing.url,
81
+ title: incoming.title !== '(untitled)' ? incoming.title : existing.title,
82
+ description: incoming.description ?? existing.description,
83
+ channel_id: incoming.channel_id ?? existing.channel_id,
84
+ channel_title: incoming.channel_title ?? existing.channel_title,
85
+ liked_at: incoming.liked_at ?? existing.liked_at,
86
+ video_published_at: incoming.video_published_at ?? existing.video_published_at,
87
+ duration: incoming.duration ?? existing.duration,
88
+ privacy_status: incoming.privacy_status ?? existing.privacy_status,
89
+ position: incoming.position ?? existing.position,
90
+ categories: incoming.categories ?? existing.categories,
91
+ primary_category: incoming.primary_category ?? existing.primary_category,
92
+ domains: incoming.domains ?? existing.domains,
93
+ primary_domain: incoming.primary_domain ?? existing.primary_domain,
94
+ classification_reason: incoming.classification_reason ?? existing.classification_reason,
95
+ classification_engine: incoming.classification_engine ?? existing.classification_engine,
96
+ classification_model: incoming.classification_model ?? existing.classification_model,
97
+ classified_at: incoming.classified_at ?? existing.classified_at,
98
+ imported_at: incoming.imported_at ?? existing.imported_at,
99
+ };
100
+ }
101
+ export async function importVideoArchive(filePath) {
102
+ const raw = fs.readFileSync(filePath, 'utf8');
103
+ const parsed = JSON.parse(raw);
104
+ if (!Array.isArray(parsed)) {
105
+ throw new Error('Imported file must be a JSON array.');
106
+ }
107
+ const importedAt = new Date().toISOString();
108
+ const incoming = parsed.map((item) => normalizeImportedVideo(item, importedAt));
109
+ const existing = await readVideoArchive();
110
+ const merged = new Map();
111
+ for (const record of existing) {
112
+ merged.set(record.id, record);
113
+ }
114
+ for (const record of incoming) {
115
+ const prior = merged.get(record.id);
116
+ merged.set(record.id, prior ? mergeVideoRecords(prior, record) : record);
117
+ }
118
+ const records = Array.from(merged.values());
119
+ writeJsonLines(videosJsonlPath(), records);
120
+ await buildIndex({ force: true });
121
+ return { imported: incoming.length, total: records.length };
122
+ }