yt-liked 0.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +203 -0
- package/bin/ytl.mjs +2 -0
- package/dist/channel-enrich.js +209 -0
- package/dist/chrome-cookies.js +130 -0
- package/dist/classify-setup.js +409 -0
- package/dist/cli.js +625 -0
- package/dist/config.js +41 -0
- package/dist/db.js +28 -0
- package/dist/gemini-classify.js +424 -0
- package/dist/jsonl.js +22 -0
- package/dist/paths.js +26 -0
- package/dist/report.js +24 -0
- package/dist/types.js +1 -0
- package/dist/videos-db.js +534 -0
- package/dist/videos-import.js +122 -0
- package/dist/videos-viz.js +140 -0
- package/dist/youtube-web.js +217 -0
- package/package.json +47 -0
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
import { openDb, saveDb } from './db.js';
|
|
2
|
+
import { readJsonLines, writeJsonLines } from './jsonl.js';
|
|
3
|
+
import { videosDbPath, videosJsonlPath } from './paths.js';
|
|
4
|
+
import { readProbeReport } from './report.js';
|
|
5
|
+
function initSchema(db) {
|
|
6
|
+
db.run(`
|
|
7
|
+
CREATE TABLE IF NOT EXISTS videos (
|
|
8
|
+
id TEXT PRIMARY KEY,
|
|
9
|
+
video_id TEXT,
|
|
10
|
+
playlist_item_id TEXT,
|
|
11
|
+
url TEXT NOT NULL,
|
|
12
|
+
title TEXT NOT NULL,
|
|
13
|
+
description TEXT,
|
|
14
|
+
channel_id TEXT,
|
|
15
|
+
channel_title TEXT,
|
|
16
|
+
liked_at TEXT,
|
|
17
|
+
video_published_at TEXT,
|
|
18
|
+
duration TEXT,
|
|
19
|
+
privacy_status TEXT,
|
|
20
|
+
position INTEGER,
|
|
21
|
+
categories TEXT,
|
|
22
|
+
primary_category TEXT,
|
|
23
|
+
domains TEXT,
|
|
24
|
+
primary_domain TEXT,
|
|
25
|
+
classification_reason TEXT,
|
|
26
|
+
classification_engine TEXT,
|
|
27
|
+
classification_model TEXT,
|
|
28
|
+
classified_at TEXT,
|
|
29
|
+
imported_at TEXT NOT NULL
|
|
30
|
+
);
|
|
31
|
+
`);
|
|
32
|
+
db.run(`
|
|
33
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS videos_fts
|
|
34
|
+
USING fts5(title, description, channel_title, content='videos', content_rowid='rowid');
|
|
35
|
+
`);
|
|
36
|
+
}
|
|
37
|
+
function escapeSql(value) {
|
|
38
|
+
return value.replace(/'/g, "''");
|
|
39
|
+
}
|
|
40
|
+
function insertRecord(db, record) {
|
|
41
|
+
const stmt = db.prepare(`
|
|
42
|
+
INSERT OR REPLACE INTO videos (
|
|
43
|
+
id, video_id, playlist_item_id, url, title, description, channel_id, channel_title,
|
|
44
|
+
liked_at, video_published_at, duration, privacy_status, position,
|
|
45
|
+
categories, primary_category, domains, primary_domain,
|
|
46
|
+
classification_reason, classification_engine, classification_model, classified_at, imported_at
|
|
47
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
48
|
+
`);
|
|
49
|
+
stmt.run([
|
|
50
|
+
record.id,
|
|
51
|
+
record.video_id,
|
|
52
|
+
record.playlist_item_id,
|
|
53
|
+
record.url,
|
|
54
|
+
record.title,
|
|
55
|
+
record.description,
|
|
56
|
+
record.channel_id,
|
|
57
|
+
record.channel_title,
|
|
58
|
+
record.liked_at,
|
|
59
|
+
record.video_published_at,
|
|
60
|
+
record.duration,
|
|
61
|
+
record.privacy_status,
|
|
62
|
+
record.position,
|
|
63
|
+
record.categories ? JSON.stringify(record.categories) : null,
|
|
64
|
+
record.primary_category,
|
|
65
|
+
record.domains ? JSON.stringify(record.domains) : null,
|
|
66
|
+
record.primary_domain,
|
|
67
|
+
record.classification_reason,
|
|
68
|
+
record.classification_engine,
|
|
69
|
+
record.classification_model,
|
|
70
|
+
record.classified_at,
|
|
71
|
+
record.imported_at,
|
|
72
|
+
]);
|
|
73
|
+
stmt.free();
|
|
74
|
+
}
|
|
75
|
+
function rowToRecord(row) {
|
|
76
|
+
return {
|
|
77
|
+
id: row[0],
|
|
78
|
+
video_id: row[1],
|
|
79
|
+
playlist_item_id: row[2],
|
|
80
|
+
url: row[3],
|
|
81
|
+
title: row[4],
|
|
82
|
+
description: row[5],
|
|
83
|
+
channel_id: row[6],
|
|
84
|
+
channel_title: row[7],
|
|
85
|
+
liked_at: row[8],
|
|
86
|
+
video_published_at: row[9],
|
|
87
|
+
duration: row[10],
|
|
88
|
+
privacy_status: row[11],
|
|
89
|
+
position: typeof row[12] === 'number' ? row[12] : null,
|
|
90
|
+
categories: row[13] ? JSON.parse(row[13]) : null,
|
|
91
|
+
primary_category: row[14],
|
|
92
|
+
domains: row[15] ? JSON.parse(row[15]) : null,
|
|
93
|
+
primary_domain: row[16],
|
|
94
|
+
classification_reason: row[17],
|
|
95
|
+
classification_engine: row[18],
|
|
96
|
+
classification_model: row[19],
|
|
97
|
+
classified_at: row[20],
|
|
98
|
+
imported_at: row[21],
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
export async function buildIndex(options) {
|
|
102
|
+
const db = await openDb(videosDbPath());
|
|
103
|
+
const records = await readJsonLines(videosJsonlPath());
|
|
104
|
+
try {
|
|
105
|
+
if (options?.force) {
|
|
106
|
+
db.run('DROP TABLE IF EXISTS videos_fts');
|
|
107
|
+
db.run('DROP TABLE IF EXISTS videos');
|
|
108
|
+
}
|
|
109
|
+
initSchema(db);
|
|
110
|
+
db.run('DELETE FROM videos');
|
|
111
|
+
db.run('BEGIN TRANSACTION');
|
|
112
|
+
for (const record of records) {
|
|
113
|
+
insertRecord(db, record);
|
|
114
|
+
}
|
|
115
|
+
db.run('COMMIT');
|
|
116
|
+
db.run(`INSERT INTO videos_fts(videos_fts) VALUES('rebuild')`);
|
|
117
|
+
saveDb(db, videosDbPath());
|
|
118
|
+
return { recordCount: records.length, newRecords: records.length };
|
|
119
|
+
}
|
|
120
|
+
finally {
|
|
121
|
+
db.close();
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
export async function exportDbToJsonl(db) {
|
|
125
|
+
const localDb = db ?? await openDb(videosDbPath());
|
|
126
|
+
const ownsDb = !db;
|
|
127
|
+
try {
|
|
128
|
+
initSchema(localDb);
|
|
129
|
+
const rows = localDb.exec(`
|
|
130
|
+
SELECT
|
|
131
|
+
id, video_id, playlist_item_id, url, title, description, channel_id, channel_title,
|
|
132
|
+
liked_at, video_published_at, duration, privacy_status, position,
|
|
133
|
+
categories, primary_category, domains, primary_domain,
|
|
134
|
+
classification_reason, classification_engine, classification_model, classified_at, imported_at
|
|
135
|
+
FROM videos
|
|
136
|
+
ORDER BY COALESCE(position, 999999999) ASC, title ASC
|
|
137
|
+
`);
|
|
138
|
+
const records = (rows[0]?.values ?? []).map((row) => rowToRecord(row));
|
|
139
|
+
writeJsonLines(videosJsonlPath(), records);
|
|
140
|
+
if (ownsDb) {
|
|
141
|
+
saveDb(localDb, videosDbPath());
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
finally {
|
|
145
|
+
if (ownsDb)
|
|
146
|
+
localDb.close();
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
export async function getVideoStatusView() {
|
|
150
|
+
const db = await openDb(videosDbPath());
|
|
151
|
+
let lastProbe = null;
|
|
152
|
+
try {
|
|
153
|
+
initSchema(db);
|
|
154
|
+
const totals = db.exec(`
|
|
155
|
+
SELECT
|
|
156
|
+
COUNT(*) AS imported_count,
|
|
157
|
+
SUM(CASE WHEN primary_category IS NOT NULL THEN 1 ELSE 0 END) AS categorized_count,
|
|
158
|
+
SUM(CASE WHEN primary_domain IS NOT NULL THEN 1 ELSE 0 END) AS domain_count
|
|
159
|
+
FROM videos
|
|
160
|
+
`);
|
|
161
|
+
const lastRunRows = db.exec(`
|
|
162
|
+
SELECT classification_engine, classification_model
|
|
163
|
+
FROM videos
|
|
164
|
+
WHERE classification_engine IS NOT NULL OR classification_model IS NOT NULL
|
|
165
|
+
ORDER BY classified_at DESC
|
|
166
|
+
LIMIT 1
|
|
167
|
+
`);
|
|
168
|
+
const values = totals[0]?.values?.[0] ?? [0, 0, 0];
|
|
169
|
+
lastProbe = readProbeReport();
|
|
170
|
+
return {
|
|
171
|
+
importedCount: Number(values[0] ?? 0),
|
|
172
|
+
categorizedCount: Number(values[1] ?? 0),
|
|
173
|
+
domainCount: Number(values[2] ?? 0),
|
|
174
|
+
lastClassificationEngine: lastRunRows[0]?.values?.[0]?.[0] ?? null,
|
|
175
|
+
lastClassificationModel: lastRunRows[0]?.values?.[0]?.[1] ?? null,
|
|
176
|
+
lastProbe,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
finally {
|
|
180
|
+
db.close();
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
function rowsToLabelCounts(rows) {
|
|
184
|
+
return rows.map((row) => ({
|
|
185
|
+
label: String(row[0] ?? 'unknown'),
|
|
186
|
+
count: Number(row[1] ?? 0),
|
|
187
|
+
}));
|
|
188
|
+
}
|
|
189
|
+
function rowsToChannelCounts(rows) {
|
|
190
|
+
return rows.map((row) => ({
|
|
191
|
+
channelTitle: String(row[0] ?? 'Unknown channel'),
|
|
192
|
+
count: Number(row[1] ?? 0),
|
|
193
|
+
}));
|
|
194
|
+
}
|
|
195
|
+
function rowToSearchResult(row) {
|
|
196
|
+
return {
|
|
197
|
+
id: row[0],
|
|
198
|
+
url: row[1],
|
|
199
|
+
title: row[2],
|
|
200
|
+
description: row[3],
|
|
201
|
+
channelTitle: row[4],
|
|
202
|
+
likedAt: row[5],
|
|
203
|
+
primaryCategory: row[6],
|
|
204
|
+
primaryDomain: row[7],
|
|
205
|
+
score: Number(row[8] ?? 0),
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
function buildVideoWhereClause(filters) {
|
|
209
|
+
const conditions = [];
|
|
210
|
+
const params = [];
|
|
211
|
+
if (filters.query) {
|
|
212
|
+
conditions.push(`v.rowid IN (SELECT rowid FROM videos_fts WHERE videos_fts MATCH ?)`);
|
|
213
|
+
params.push(filters.query);
|
|
214
|
+
}
|
|
215
|
+
if (filters.channel) {
|
|
216
|
+
conditions.push(`v.channel_title = ? COLLATE NOCASE`);
|
|
217
|
+
params.push(filters.channel);
|
|
218
|
+
}
|
|
219
|
+
if (filters.after) {
|
|
220
|
+
conditions.push(`COALESCE(v.liked_at, v.video_published_at, v.imported_at) >= ?`);
|
|
221
|
+
params.push(filters.after);
|
|
222
|
+
}
|
|
223
|
+
if (filters.before) {
|
|
224
|
+
conditions.push(`COALESCE(v.liked_at, v.video_published_at, v.imported_at) <= ?`);
|
|
225
|
+
params.push(filters.before);
|
|
226
|
+
}
|
|
227
|
+
if (filters.category) {
|
|
228
|
+
conditions.push(`v.primary_category = ? COLLATE NOCASE`);
|
|
229
|
+
params.push(filters.category);
|
|
230
|
+
}
|
|
231
|
+
if (filters.domain) {
|
|
232
|
+
conditions.push(`v.primary_domain = ? COLLATE NOCASE`);
|
|
233
|
+
params.push(filters.domain);
|
|
234
|
+
}
|
|
235
|
+
if (filters.privacy) {
|
|
236
|
+
conditions.push(`COALESCE(v.privacy_status, 'unknown') = ? COLLATE NOCASE`);
|
|
237
|
+
params.push(filters.privacy);
|
|
238
|
+
}
|
|
239
|
+
return {
|
|
240
|
+
where: conditions.length ? `WHERE ${conditions.join(' AND ')}` : '',
|
|
241
|
+
params,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
function videoSortClause(direction = 'desc') {
|
|
245
|
+
const normalized = direction === 'asc' ? 'ASC' : 'DESC';
|
|
246
|
+
return `
|
|
247
|
+
ORDER BY
|
|
248
|
+
COALESCE(v.liked_at, v.video_published_at, v.imported_at) ${normalized},
|
|
249
|
+
v.title ${normalized}
|
|
250
|
+
`;
|
|
251
|
+
}
|
|
252
|
+
export async function getVideoVizView() {
|
|
253
|
+
const db = await openDb(videosDbPath());
|
|
254
|
+
try {
|
|
255
|
+
initSchema(db);
|
|
256
|
+
const totals = db.exec(`
|
|
257
|
+
SELECT
|
|
258
|
+
COUNT(*) AS imported_count,
|
|
259
|
+
SUM(CASE WHEN primary_category IS NOT NULL THEN 1 ELSE 0 END) AS categorized_count,
|
|
260
|
+
SUM(CASE WHEN primary_domain IS NOT NULL THEN 1 ELSE 0 END) AS domain_count
|
|
261
|
+
FROM videos
|
|
262
|
+
`);
|
|
263
|
+
const categoryRows = db.exec(`
|
|
264
|
+
SELECT primary_category, COUNT(*) AS n
|
|
265
|
+
FROM videos
|
|
266
|
+
WHERE primary_category IS NOT NULL
|
|
267
|
+
GROUP BY primary_category
|
|
268
|
+
ORDER BY n DESC, primary_category ASC
|
|
269
|
+
LIMIT 12
|
|
270
|
+
`);
|
|
271
|
+
const domainRows = db.exec(`
|
|
272
|
+
SELECT primary_domain, COUNT(*) AS n
|
|
273
|
+
FROM videos
|
|
274
|
+
WHERE primary_domain IS NOT NULL
|
|
275
|
+
GROUP BY primary_domain
|
|
276
|
+
ORDER BY n DESC, primary_domain ASC
|
|
277
|
+
LIMIT 12
|
|
278
|
+
`);
|
|
279
|
+
const channelRows = db.exec(`
|
|
280
|
+
SELECT COALESCE(NULLIF(channel_title, ''), 'Unknown channel') AS channel_title, COUNT(*) AS n
|
|
281
|
+
FROM videos
|
|
282
|
+
GROUP BY channel_title
|
|
283
|
+
ORDER BY n DESC, channel_title ASC
|
|
284
|
+
LIMIT 10
|
|
285
|
+
`);
|
|
286
|
+
const monthlyRows = db.exec(`
|
|
287
|
+
SELECT substr(liked_at, 1, 7) AS ym, COUNT(*) AS n
|
|
288
|
+
FROM videos
|
|
289
|
+
WHERE liked_at IS NOT NULL AND liked_at != ''
|
|
290
|
+
GROUP BY ym
|
|
291
|
+
ORDER BY ym ASC
|
|
292
|
+
LIMIT 18
|
|
293
|
+
`);
|
|
294
|
+
const privacyRows = db.exec(`
|
|
295
|
+
SELECT COALESCE(NULLIF(privacy_status, ''), 'unknown') AS privacy_status, COUNT(*) AS n
|
|
296
|
+
FROM videos
|
|
297
|
+
GROUP BY privacy_status
|
|
298
|
+
ORDER BY n DESC, privacy_status ASC
|
|
299
|
+
LIMIT 6
|
|
300
|
+
`);
|
|
301
|
+
const distinctChannelRows = db.exec(`
|
|
302
|
+
SELECT
|
|
303
|
+
COUNT(DISTINCT COALESCE(NULLIF(channel_title, ''), '__missing__')) AS distinct_titles,
|
|
304
|
+
COUNT(DISTINCT COALESCE(NULLIF(channel_id, ''), '__missing__')) AS distinct_ids
|
|
305
|
+
FROM videos
|
|
306
|
+
`);
|
|
307
|
+
const values = totals[0]?.values?.[0] ?? [0, 0, 0];
|
|
308
|
+
const importedCount = Number(values[0] ?? 0);
|
|
309
|
+
const categorizedCount = Number(values[1] ?? 0);
|
|
310
|
+
const domainCount = Number(values[2] ?? 0);
|
|
311
|
+
const distinctValues = distinctChannelRows[0]?.values?.[0] ?? [0, 0];
|
|
312
|
+
const distinctChannelTitles = Number(distinctValues[0] ?? 0);
|
|
313
|
+
const distinctChannelIds = Number(distinctValues[1] ?? 0);
|
|
314
|
+
const channelMetadataLikelyOwnerFallback = importedCount > 25 && (distinctChannelTitles <= 1 || distinctChannelIds <= 1);
|
|
315
|
+
const dominantFallbackChannelTitle = String(channelRows[0]?.values?.[0]?.[0] ?? '') || null;
|
|
316
|
+
const dominantFallbackChannelCount = Number(channelRows[0]?.values?.[0]?.[1] ?? 0);
|
|
317
|
+
return {
|
|
318
|
+
importedCount,
|
|
319
|
+
categorizedCount,
|
|
320
|
+
domainCount,
|
|
321
|
+
uncategorizedCount: Math.max(0, importedCount - categorizedCount),
|
|
322
|
+
undomainedCount: Math.max(0, importedCount - domainCount),
|
|
323
|
+
topCategories: rowsToLabelCounts(categoryRows[0]?.values ?? []),
|
|
324
|
+
topDomains: rowsToLabelCounts(domainRows[0]?.values ?? []),
|
|
325
|
+
topChannels: rowsToChannelCounts(channelRows[0]?.values ?? []),
|
|
326
|
+
monthlyLikes: rowsToLabelCounts(monthlyRows[0]?.values ?? []),
|
|
327
|
+
privacyBreakdown: rowsToLabelCounts(privacyRows[0]?.values ?? []),
|
|
328
|
+
distinctChannelTitles,
|
|
329
|
+
distinctChannelIds,
|
|
330
|
+
channelMetadataLikelyOwnerFallback,
|
|
331
|
+
dominantFallbackChannelTitle,
|
|
332
|
+
dominantFallbackChannelCount,
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
finally {
|
|
336
|
+
db.close();
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
export async function searchVideos(filters) {
|
|
340
|
+
const db = await openDb(videosDbPath());
|
|
341
|
+
try {
|
|
342
|
+
initSchema(db);
|
|
343
|
+
const limit = filters.limit ?? 20;
|
|
344
|
+
const { where, params } = buildVideoWhereClause(filters);
|
|
345
|
+
const sql = `
|
|
346
|
+
SELECT
|
|
347
|
+
v.id,
|
|
348
|
+
v.url,
|
|
349
|
+
v.title,
|
|
350
|
+
v.description,
|
|
351
|
+
v.channel_title,
|
|
352
|
+
v.liked_at,
|
|
353
|
+
v.primary_category,
|
|
354
|
+
v.primary_domain,
|
|
355
|
+
bm25(videos_fts, 3.5, 1.5, 1.0) AS score
|
|
356
|
+
FROM videos v
|
|
357
|
+
JOIN videos_fts ON videos_fts.rowid = v.rowid
|
|
358
|
+
${where}
|
|
359
|
+
ORDER BY bm25(videos_fts, 3.5, 1.5, 1.0) ASC,
|
|
360
|
+
COALESCE(v.liked_at, v.video_published_at, v.imported_at) DESC
|
|
361
|
+
LIMIT ?
|
|
362
|
+
`;
|
|
363
|
+
params.push(limit);
|
|
364
|
+
const rows = db.exec(sql, params);
|
|
365
|
+
return (rows[0]?.values ?? []).map((row) => rowToSearchResult(row));
|
|
366
|
+
}
|
|
367
|
+
finally {
|
|
368
|
+
db.close();
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
export async function listVideos(filters = {}) {
|
|
372
|
+
const db = await openDb(videosDbPath());
|
|
373
|
+
try {
|
|
374
|
+
initSchema(db);
|
|
375
|
+
const limit = filters.limit ?? 30;
|
|
376
|
+
const offset = filters.offset ?? 0;
|
|
377
|
+
const { where, params } = buildVideoWhereClause(filters);
|
|
378
|
+
const sql = `
|
|
379
|
+
SELECT
|
|
380
|
+
v.id, v.video_id, v.playlist_item_id, v.url, v.title, v.description, v.channel_id, v.channel_title,
|
|
381
|
+
v.liked_at, v.video_published_at, v.duration, v.privacy_status, v.position,
|
|
382
|
+
v.categories, v.primary_category, v.domains, v.primary_domain,
|
|
383
|
+
v.classification_reason, v.classification_engine, v.classification_model, v.classified_at, v.imported_at
|
|
384
|
+
FROM videos v
|
|
385
|
+
${where}
|
|
386
|
+
${videoSortClause(filters.sort)}
|
|
387
|
+
LIMIT ?
|
|
388
|
+
OFFSET ?
|
|
389
|
+
`;
|
|
390
|
+
params.push(limit, offset);
|
|
391
|
+
const rows = db.exec(sql, params);
|
|
392
|
+
return (rows[0]?.values ?? []).map((row) => rowToRecord(row));
|
|
393
|
+
}
|
|
394
|
+
finally {
|
|
395
|
+
db.close();
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
export async function getVideoById(id) {
|
|
399
|
+
const db = await openDb(videosDbPath());
|
|
400
|
+
try {
|
|
401
|
+
initSchema(db);
|
|
402
|
+
const rows = db.exec(`
|
|
403
|
+
SELECT
|
|
404
|
+
v.id, v.video_id, v.playlist_item_id, v.url, v.title, v.description, v.channel_id, v.channel_title,
|
|
405
|
+
v.liked_at, v.video_published_at, v.duration, v.privacy_status, v.position,
|
|
406
|
+
v.categories, v.primary_category, v.domains, v.primary_domain,
|
|
407
|
+
v.classification_reason, v.classification_engine, v.classification_model, v.classified_at, v.imported_at
|
|
408
|
+
FROM videos v
|
|
409
|
+
WHERE v.id = ?
|
|
410
|
+
LIMIT 1
|
|
411
|
+
`, [id]);
|
|
412
|
+
const row = rows[0]?.values?.[0];
|
|
413
|
+
return row ? rowToRecord(row) : null;
|
|
414
|
+
}
|
|
415
|
+
finally {
|
|
416
|
+
db.close();
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
export async function getVideoByLookupKey(key) {
|
|
420
|
+
const db = await openDb(videosDbPath());
|
|
421
|
+
try {
|
|
422
|
+
initSchema(db);
|
|
423
|
+
const rows = db.exec(`
|
|
424
|
+
SELECT
|
|
425
|
+
v.id, v.video_id, v.playlist_item_id, v.url, v.title, v.description, v.channel_id, v.channel_title,
|
|
426
|
+
v.liked_at, v.video_published_at, v.duration, v.privacy_status, v.position,
|
|
427
|
+
v.categories, v.primary_category, v.domains, v.primary_domain,
|
|
428
|
+
v.classification_reason, v.classification_engine, v.classification_model, v.classified_at, v.imported_at
|
|
429
|
+
FROM videos v
|
|
430
|
+
WHERE v.id = ?
|
|
431
|
+
OR COALESCE(v.video_id, '') = ?
|
|
432
|
+
OR v.url = ?
|
|
433
|
+
LIMIT 1
|
|
434
|
+
`, [key, key, key]);
|
|
435
|
+
const row = rows[0]?.values?.[0];
|
|
436
|
+
return row ? rowToRecord(row) : null;
|
|
437
|
+
}
|
|
438
|
+
finally {
|
|
439
|
+
db.close();
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
export async function loadClassificationItems(kind, options = {}) {
|
|
443
|
+
const db = await openDb(videosDbPath());
|
|
444
|
+
try {
|
|
445
|
+
initSchema(db);
|
|
446
|
+
const where = kind === 'categories'
|
|
447
|
+
? (options.all ? '1=1' : 'primary_category IS NULL')
|
|
448
|
+
: (options.all ? '1=1' : 'primary_domain IS NULL');
|
|
449
|
+
const limitClause = options.limit ? ` LIMIT ${Math.max(1, options.limit)}` : '';
|
|
450
|
+
const rows = db.exec(`
|
|
451
|
+
SELECT id, title, description, channel_title, duration, privacy_status, categories
|
|
452
|
+
FROM videos
|
|
453
|
+
WHERE ${where}
|
|
454
|
+
ORDER BY RANDOM()
|
|
455
|
+
${limitClause}
|
|
456
|
+
`);
|
|
457
|
+
return (rows[0]?.values ?? []).map((row) => ({
|
|
458
|
+
id: row[0],
|
|
459
|
+
title: row[1],
|
|
460
|
+
description: row[2],
|
|
461
|
+
channelTitle: row[3],
|
|
462
|
+
duration: row[4],
|
|
463
|
+
privacyStatus: row[5],
|
|
464
|
+
existingCategories: row[6] ? JSON.parse(row[6]) : null,
|
|
465
|
+
}));
|
|
466
|
+
}
|
|
467
|
+
finally {
|
|
468
|
+
db.close();
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
export async function applyCategoryUpdates(updates) {
|
|
472
|
+
const db = await openDb(videosDbPath());
|
|
473
|
+
try {
|
|
474
|
+
initSchema(db);
|
|
475
|
+
const stmt = db.prepare(`
|
|
476
|
+
UPDATE videos
|
|
477
|
+
SET categories = ?, primary_category = ?, classification_reason = ?, classification_engine = ?,
|
|
478
|
+
classification_model = ?, classified_at = ?
|
|
479
|
+
WHERE id = ?
|
|
480
|
+
`);
|
|
481
|
+
const classifiedAt = new Date().toISOString();
|
|
482
|
+
for (const update of updates) {
|
|
483
|
+
stmt.run([
|
|
484
|
+
JSON.stringify(update.categories),
|
|
485
|
+
update.primary,
|
|
486
|
+
update.reason,
|
|
487
|
+
update.engine,
|
|
488
|
+
update.model ?? null,
|
|
489
|
+
classifiedAt,
|
|
490
|
+
update.id,
|
|
491
|
+
]);
|
|
492
|
+
}
|
|
493
|
+
stmt.free();
|
|
494
|
+
saveDb(db, videosDbPath());
|
|
495
|
+
await exportDbToJsonl(db);
|
|
496
|
+
}
|
|
497
|
+
finally {
|
|
498
|
+
db.close();
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
export async function applyDomainUpdates(updates) {
|
|
502
|
+
const db = await openDb(videosDbPath());
|
|
503
|
+
try {
|
|
504
|
+
initSchema(db);
|
|
505
|
+
const stmt = db.prepare(`
|
|
506
|
+
UPDATE videos
|
|
507
|
+
SET domains = ?, primary_domain = ?, classification_reason = COALESCE(?, classification_reason),
|
|
508
|
+
classification_engine = ?, classification_model = ?, classified_at = ?
|
|
509
|
+
WHERE id = ?
|
|
510
|
+
`);
|
|
511
|
+
const classifiedAt = new Date().toISOString();
|
|
512
|
+
for (const update of updates) {
|
|
513
|
+
stmt.run([
|
|
514
|
+
JSON.stringify(update.domains),
|
|
515
|
+
update.primary,
|
|
516
|
+
update.reason,
|
|
517
|
+
update.engine,
|
|
518
|
+
update.model ?? null,
|
|
519
|
+
classifiedAt,
|
|
520
|
+
update.id,
|
|
521
|
+
]);
|
|
522
|
+
}
|
|
523
|
+
stmt.free();
|
|
524
|
+
saveDb(db, videosDbPath());
|
|
525
|
+
await exportDbToJsonl(db);
|
|
526
|
+
}
|
|
527
|
+
finally {
|
|
528
|
+
db.close();
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
export async function requireVideoData() {
|
|
532
|
+
const records = await readJsonLines(videosJsonlPath());
|
|
533
|
+
return records.length > 0;
|
|
534
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { videosJsonlPath } from './paths.js';
|
|
3
|
+
import { readVideoArchive, writeJsonLines } from './jsonl.js';
|
|
4
|
+
import { buildIndex } from './videos-db.js';
|
|
5
|
+
function toStringOrNull(value) {
|
|
6
|
+
if (typeof value !== 'string')
|
|
7
|
+
return null;
|
|
8
|
+
const trimmed = value.trim();
|
|
9
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
10
|
+
}
|
|
11
|
+
function toStringArray(value) {
|
|
12
|
+
if (Array.isArray(value)) {
|
|
13
|
+
const items = value
|
|
14
|
+
.map((item) => (typeof item === 'string' ? item.trim().toLowerCase() : ''))
|
|
15
|
+
.filter(Boolean);
|
|
16
|
+
return items.length > 0 ? Array.from(new Set(items)) : null;
|
|
17
|
+
}
|
|
18
|
+
if (typeof value === 'string') {
|
|
19
|
+
const parts = value
|
|
20
|
+
.split(',')
|
|
21
|
+
.map((item) => item.trim().toLowerCase())
|
|
22
|
+
.filter(Boolean);
|
|
23
|
+
return parts.length > 0 ? Array.from(new Set(parts)) : null;
|
|
24
|
+
}
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
function normalizeUrl(input) {
|
|
28
|
+
const explicit = toStringOrNull(input.url);
|
|
29
|
+
if (explicit)
|
|
30
|
+
return explicit;
|
|
31
|
+
const videoId = toStringOrNull(input.video_id);
|
|
32
|
+
if (videoId)
|
|
33
|
+
return `https://www.youtube.com/watch?v=${videoId}`;
|
|
34
|
+
throw new Error('Could not determine a video URL for one imported item.');
|
|
35
|
+
}
|
|
36
|
+
export function normalizeImportedVideo(input, importedAt = new Date().toISOString()) {
|
|
37
|
+
const playlistItemId = toStringOrNull(input.playlist_item_id);
|
|
38
|
+
const videoId = toStringOrNull(input.video_id);
|
|
39
|
+
const url = normalizeUrl(input);
|
|
40
|
+
const id = playlistItemId ?? videoId ?? url;
|
|
41
|
+
const primaryCategory = toStringOrNull(input.primary_category)?.toLowerCase() ?? null;
|
|
42
|
+
let categories = toStringArray(input.categories) ?? toStringArray(input.tags);
|
|
43
|
+
if (!categories && primaryCategory) {
|
|
44
|
+
categories = [primaryCategory];
|
|
45
|
+
}
|
|
46
|
+
const primaryDomain = toStringOrNull(input.primary_domain)?.toLowerCase() ?? null;
|
|
47
|
+
let domains = toStringArray(input.domains);
|
|
48
|
+
if (!domains && primaryDomain) {
|
|
49
|
+
domains = [primaryDomain];
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
id,
|
|
53
|
+
video_id: videoId,
|
|
54
|
+
playlist_item_id: playlistItemId,
|
|
55
|
+
url,
|
|
56
|
+
title: toStringOrNull(input.title) ?? '(untitled)',
|
|
57
|
+
description: toStringOrNull(input.description),
|
|
58
|
+
channel_id: toStringOrNull(input.channel_id),
|
|
59
|
+
channel_title: toStringOrNull(input.channel_title),
|
|
60
|
+
liked_at: toStringOrNull(input.published_at),
|
|
61
|
+
video_published_at: toStringOrNull(input.video_published_at),
|
|
62
|
+
duration: toStringOrNull(input.duration),
|
|
63
|
+
privacy_status: toStringOrNull(input.privacy_status),
|
|
64
|
+
position: typeof input.position === 'number' ? input.position : null,
|
|
65
|
+
categories,
|
|
66
|
+
primary_category: primaryCategory,
|
|
67
|
+
domains,
|
|
68
|
+
primary_domain: primaryDomain,
|
|
69
|
+
classification_reason: toStringOrNull(input.classification_reason),
|
|
70
|
+
classification_engine: toStringOrNull(input.classification_engine),
|
|
71
|
+
classification_model: toStringOrNull(input.classification_model),
|
|
72
|
+
classified_at: toStringOrNull(input.classified_at),
|
|
73
|
+
imported_at: importedAt,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
export function mergeVideoRecords(existing, incoming) {
|
|
77
|
+
return {
|
|
78
|
+
...existing,
|
|
79
|
+
...incoming,
|
|
80
|
+
url: incoming.url || existing.url,
|
|
81
|
+
title: incoming.title !== '(untitled)' ? incoming.title : existing.title,
|
|
82
|
+
description: incoming.description ?? existing.description,
|
|
83
|
+
channel_id: incoming.channel_id ?? existing.channel_id,
|
|
84
|
+
channel_title: incoming.channel_title ?? existing.channel_title,
|
|
85
|
+
liked_at: incoming.liked_at ?? existing.liked_at,
|
|
86
|
+
video_published_at: incoming.video_published_at ?? existing.video_published_at,
|
|
87
|
+
duration: incoming.duration ?? existing.duration,
|
|
88
|
+
privacy_status: incoming.privacy_status ?? existing.privacy_status,
|
|
89
|
+
position: incoming.position ?? existing.position,
|
|
90
|
+
categories: incoming.categories ?? existing.categories,
|
|
91
|
+
primary_category: incoming.primary_category ?? existing.primary_category,
|
|
92
|
+
domains: incoming.domains ?? existing.domains,
|
|
93
|
+
primary_domain: incoming.primary_domain ?? existing.primary_domain,
|
|
94
|
+
classification_reason: incoming.classification_reason ?? existing.classification_reason,
|
|
95
|
+
classification_engine: incoming.classification_engine ?? existing.classification_engine,
|
|
96
|
+
classification_model: incoming.classification_model ?? existing.classification_model,
|
|
97
|
+
classified_at: incoming.classified_at ?? existing.classified_at,
|
|
98
|
+
imported_at: incoming.imported_at ?? existing.imported_at,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
export async function importVideoArchive(filePath) {
|
|
102
|
+
const raw = fs.readFileSync(filePath, 'utf8');
|
|
103
|
+
const parsed = JSON.parse(raw);
|
|
104
|
+
if (!Array.isArray(parsed)) {
|
|
105
|
+
throw new Error('Imported file must be a JSON array.');
|
|
106
|
+
}
|
|
107
|
+
const importedAt = new Date().toISOString();
|
|
108
|
+
const incoming = parsed.map((item) => normalizeImportedVideo(item, importedAt));
|
|
109
|
+
const existing = await readVideoArchive();
|
|
110
|
+
const merged = new Map();
|
|
111
|
+
for (const record of existing) {
|
|
112
|
+
merged.set(record.id, record);
|
|
113
|
+
}
|
|
114
|
+
for (const record of incoming) {
|
|
115
|
+
const prior = merged.get(record.id);
|
|
116
|
+
merged.set(record.id, prior ? mergeVideoRecords(prior, record) : record);
|
|
117
|
+
}
|
|
118
|
+
const records = Array.from(merged.values());
|
|
119
|
+
writeJsonLines(videosJsonlPath(), records);
|
|
120
|
+
await buildIndex({ force: true });
|
|
121
|
+
return { imported: incoming.length, total: records.length };
|
|
122
|
+
}
|