@tikoci/rosetta 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -3
- package/package.json +1 -1
- package/src/browse.ts +1234 -0
- package/src/db.ts +82 -0
- package/src/extract-devices.ts +14 -1
- package/src/extract-videos.test.ts +356 -0
- package/src/extract-videos.ts +734 -0
- package/src/mcp-http.test.ts +5 -5
- package/src/mcp.ts +61 -0
- package/src/query.test.ts +187 -1
- package/src/query.ts +51 -0
- package/src/release.test.ts +12 -0
package/src/db.ts
CHANGED
|
@@ -17,6 +17,10 @@
|
|
|
17
17
|
* devices_fts — FTS5 over product name, code, architecture, CPU
|
|
18
18
|
* changelogs — parsed changelog entries per RouterOS version
|
|
19
19
|
* changelogs_fts — FTS5 over category, description
|
|
20
|
+
* videos — MikroTik YouTube video metadata (title, description, duration, chapters)
|
|
21
|
+
* videos_fts — FTS5 over title, description
|
|
22
|
+
* video_segments — transcript segments (one per chapter, or full video if no chapters)
|
|
23
|
+
* video_segments_fts — FTS5 over chapter_title, transcript
|
|
20
24
|
*/
|
|
21
25
|
|
|
22
26
|
import sqlite from "bun:sqlite";
|
|
@@ -346,6 +350,82 @@ export function initDb() {
|
|
|
346
350
|
INSERT INTO changelogs_fts(rowid, category, description)
|
|
347
351
|
VALUES (new.id, new.category, new.description);
|
|
348
352
|
END;`);
|
|
353
|
+
|
|
354
|
+
// -- Videos (MikroTik YouTube channel transcripts) --
|
|
355
|
+
|
|
356
|
+
db.run(`CREATE TABLE IF NOT EXISTS videos (
|
|
357
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
358
|
+
video_id TEXT NOT NULL UNIQUE,
|
|
359
|
+
title TEXT NOT NULL,
|
|
360
|
+
description TEXT,
|
|
361
|
+
channel TEXT,
|
|
362
|
+
upload_date TEXT,
|
|
363
|
+
duration_s INTEGER,
|
|
364
|
+
url TEXT,
|
|
365
|
+
view_count INTEGER,
|
|
366
|
+
like_count INTEGER,
|
|
367
|
+
has_chapters INTEGER NOT NULL DEFAULT 0
|
|
368
|
+
);`);
|
|
369
|
+
|
|
370
|
+
db.run(`CREATE VIRTUAL TABLE IF NOT EXISTS videos_fts USING fts5(
|
|
371
|
+
title, description,
|
|
372
|
+
content=videos,
|
|
373
|
+
content_rowid=id,
|
|
374
|
+
tokenize='porter unicode61'
|
|
375
|
+
);`);
|
|
376
|
+
|
|
377
|
+
db.run(`CREATE TRIGGER IF NOT EXISTS videos_ai AFTER INSERT ON videos BEGIN
|
|
378
|
+
INSERT INTO videos_fts(rowid, title, description)
|
|
379
|
+
VALUES (new.id, new.title, new.description);
|
|
380
|
+
END;`);
|
|
381
|
+
db.run(`CREATE TRIGGER IF NOT EXISTS videos_ad AFTER DELETE ON videos BEGIN
|
|
382
|
+
INSERT INTO videos_fts(videos_fts, rowid, title, description)
|
|
383
|
+
VALUES('delete', old.id, old.title, old.description);
|
|
384
|
+
END;`);
|
|
385
|
+
db.run(`CREATE TRIGGER IF NOT EXISTS videos_au AFTER UPDATE ON videos BEGIN
|
|
386
|
+
INSERT INTO videos_fts(videos_fts, rowid, title, description)
|
|
387
|
+
VALUES('delete', old.id, old.title, old.description);
|
|
388
|
+
INSERT INTO videos_fts(rowid, title, description)
|
|
389
|
+
VALUES (new.id, new.title, new.description);
|
|
390
|
+
END;`);
|
|
391
|
+
|
|
392
|
+
db.run(`CREATE INDEX IF NOT EXISTS idx_videos_upload_date ON videos(upload_date);`);
|
|
393
|
+
|
|
394
|
+
// -- Video segments (transcript chunks, one per chapter or one per video) --
|
|
395
|
+
|
|
396
|
+
db.run(`CREATE TABLE IF NOT EXISTS video_segments (
|
|
397
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
398
|
+
video_id INTEGER NOT NULL REFERENCES videos(id),
|
|
399
|
+
chapter_title TEXT,
|
|
400
|
+
start_s INTEGER NOT NULL DEFAULT 0,
|
|
401
|
+
end_s INTEGER,
|
|
402
|
+
transcript TEXT NOT NULL,
|
|
403
|
+
sort_order INTEGER NOT NULL
|
|
404
|
+
);`);
|
|
405
|
+
|
|
406
|
+
db.run(`CREATE VIRTUAL TABLE IF NOT EXISTS video_segments_fts USING fts5(
|
|
407
|
+
chapter_title, transcript,
|
|
408
|
+
content=video_segments,
|
|
409
|
+
content_rowid=id,
|
|
410
|
+
tokenize='porter unicode61'
|
|
411
|
+
);`);
|
|
412
|
+
|
|
413
|
+
db.run(`CREATE TRIGGER IF NOT EXISTS video_segs_ai AFTER INSERT ON video_segments BEGIN
|
|
414
|
+
INSERT INTO video_segments_fts(rowid, chapter_title, transcript)
|
|
415
|
+
VALUES (new.id, new.chapter_title, new.transcript);
|
|
416
|
+
END;`);
|
|
417
|
+
db.run(`CREATE TRIGGER IF NOT EXISTS video_segs_ad AFTER DELETE ON video_segments BEGIN
|
|
418
|
+
INSERT INTO video_segments_fts(video_segments_fts, rowid, chapter_title, transcript)
|
|
419
|
+
VALUES('delete', old.id, old.chapter_title, old.transcript);
|
|
420
|
+
END;`);
|
|
421
|
+
db.run(`CREATE TRIGGER IF NOT EXISTS video_segs_au AFTER UPDATE ON video_segments BEGIN
|
|
422
|
+
INSERT INTO video_segments_fts(video_segments_fts, rowid, chapter_title, transcript)
|
|
423
|
+
VALUES('delete', old.id, old.chapter_title, old.transcript);
|
|
424
|
+
INSERT INTO video_segments_fts(rowid, chapter_title, transcript)
|
|
425
|
+
VALUES (new.id, new.chapter_title, new.transcript);
|
|
426
|
+
END;`);
|
|
427
|
+
|
|
428
|
+
db.run(`CREATE INDEX IF NOT EXISTS idx_video_segs_video ON video_segments(video_id);`);
|
|
349
429
|
}
|
|
350
430
|
|
|
351
431
|
/**
|
|
@@ -375,6 +455,8 @@ export function getDbStats() {
|
|
|
375
455
|
changelogs: count("SELECT COUNT(*) AS c FROM changelogs"),
|
|
376
456
|
changelog_versions: count("SELECT COUNT(DISTINCT version) AS c FROM changelogs"),
|
|
377
457
|
ros_versions: count("SELECT COUNT(*) AS c FROM ros_versions"),
|
|
458
|
+
videos: count("SELECT COUNT(*) AS c FROM videos"),
|
|
459
|
+
video_segments: count("SELECT COUNT(*) AS c FROM video_segments"),
|
|
378
460
|
...(() => {
|
|
379
461
|
// Semantic version sort — SQL MIN/MAX is lexicographic ("7.10" < "7.9")
|
|
380
462
|
const versions = (db.prepare("SELECT version FROM ros_versions").all() as Array<{ version: string }>).map((r) => r.version);
|
package/src/extract-devices.ts
CHANGED
|
@@ -10,6 +10,19 @@
|
|
|
10
10
|
import { readFileSync } from "node:fs";
|
|
11
11
|
import { db, initDb } from "./db.ts";
|
|
12
12
|
|
|
13
|
+
/** Map of Unicode superscript/subscript digits → ASCII digits (e.g. ³→3, ²→2). */
|
|
14
|
+
const DIGIT_SUPER_SUB: Record<string, string> = {
|
|
15
|
+
"⁰": "0", "¹": "1", "²": "2", "³": "3", "⁴": "4",
|
|
16
|
+
"⁵": "5", "⁶": "6", "⁷": "7", "⁸": "8", "⁹": "9",
|
|
17
|
+
"₀": "0", "₁": "1", "₂": "2", "₃": "3", "₄": "4",
|
|
18
|
+
"₅": "5", "₆": "6", "₇": "7", "₈": "8", "₉": "9",
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/** Normalize Unicode superscript/subscript digits to ASCII in product names. */
|
|
22
|
+
function normalizeSuperscripts(s: string): string {
|
|
23
|
+
return s.replace(/[⁰¹²³⁴⁵⁶⁷⁸⁹₀₁₂₃₄₅₆₇₈₉]/g, (c) => DIGIT_SUPER_SUB[c] ?? c);
|
|
24
|
+
}
|
|
25
|
+
|
|
13
26
|
const DEFAULT_CSV = "matrix/2026-03-25/matrix.csv";
|
|
14
27
|
const csvPath = process.argv[2] || DEFAULT_CSV;
|
|
15
28
|
|
|
@@ -141,7 +154,7 @@ const insertAll = db.transaction(() => {
|
|
|
141
154
|
continue;
|
|
142
155
|
}
|
|
143
156
|
|
|
144
|
-
const productName = f[0].trim();
|
|
157
|
+
const productName = normalizeSuperscripts(f[0].trim());
|
|
145
158
|
if (!productName) {
|
|
146
159
|
skipped++;
|
|
147
160
|
continue;
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* extract-videos.test.ts — Tests for extract-videos.ts failure modes and pure functions.
|
|
3
|
+
*
|
|
4
|
+
* Uses mock yt-dlp shell scripts to verify timeout/error/missing-VTT handling
|
|
5
|
+
* without hitting the network. The YTDLP_DEFAULT constant and the injectable
|
|
6
|
+
* `ytdlp` parameters on downloadTranscript / listPlaylist make this possible.
|
|
7
|
+
*
|
|
8
|
+
* Cache tests (saveCache / importCache / loadKnownBad / findLatestCache) use an
|
|
9
|
+
* in-memory SQLite seeded via initDb() — same pattern as query.test.ts.
|
|
10
|
+
*
|
|
11
|
+
* Pure-function tests (parseVtt / segmentTranscript) are in query.test.ts.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// Set BEFORE any import that transitively loads db.ts
|
|
15
|
+
process.env.DB_PATH = ":memory:";
|
|
16
|
+
|
|
17
|
+
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
18
|
+
import { chmodSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
19
|
+
import { tmpdir } from "node:os";
|
|
20
|
+
import { join } from "node:path";
|
|
21
|
+
|
|
22
|
+
// Dynamic imports guarantee that the DB_PATH env-var above wins over Bun's
|
|
23
|
+
// static-import hoisting — same pattern as query.test.ts.
|
|
24
|
+
const { db, initDb } = await import("./db.ts");
|
|
25
|
+
const { downloadTranscript, listPlaylist, saveCache, importCache, loadKnownBad, findLatestCache } =
|
|
26
|
+
await import("./extract-videos.ts");
|
|
27
|
+
|
|
28
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
/** Write a shell script to `path` and make it executable. */
|
|
31
|
+
function writeMock(path: string, script: string): void {
|
|
32
|
+
writeFileSync(path, `#!/bin/sh\n${script}\n`);
|
|
33
|
+
chmodSync(path, 0o755);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
// ── Fixture setup ─────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
let tmpBase: string;
|
|
40
|
+
let mockBin: string;
|
|
41
|
+
let downloadDir: string;
|
|
42
|
+
|
|
43
|
+
beforeAll(() => {
|
|
44
|
+
// Initialize the in-memory DB schema so cache functions can INSERT/SELECT
|
|
45
|
+
initDb();
|
|
46
|
+
|
|
47
|
+
tmpBase = join(tmpdir(), `rosetta-vid-test-${Date.now()}`);
|
|
48
|
+
mkdirSync(tmpBase, { recursive: true });
|
|
49
|
+
mockBin = join(tmpBase, "yt-dlp-mock");
|
|
50
|
+
downloadDir = join(tmpBase, "downloads");
|
|
51
|
+
mkdirSync(downloadDir, { recursive: true });
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
afterAll(() => {
|
|
55
|
+
rmSync(tmpBase, { recursive: true, force: true });
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// ── downloadTranscript failure modes ─────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
describe("downloadTranscript", () => {
|
|
61
|
+
test("returns 'ok' when yt-dlp exits 0", () => {
|
|
62
|
+
writeMock(mockBin, "exit 0");
|
|
63
|
+
const result = downloadTranscript("vid1", downloadDir, mockBin, 10_000);
|
|
64
|
+
expect(result).toBe("ok");
|
|
65
|
+
}, 15_000);
|
|
66
|
+
|
|
67
|
+
test("returns 'error' when yt-dlp exits non-zero", () => {
|
|
68
|
+
writeMock(mockBin, "exit 1");
|
|
69
|
+
const result = downloadTranscript("vid1", downloadDir, mockBin);
|
|
70
|
+
expect(result).toBe("error");
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("returns 'error' when yt-dlp exits 2 (usage error)", () => {
|
|
74
|
+
writeMock(mockBin, "exit 2");
|
|
75
|
+
const result = downloadTranscript("vid1", downloadDir, mockBin);
|
|
76
|
+
expect(result).toBe("error");
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("returns 'timeout' when yt-dlp exceeds the timeout", () => {
|
|
80
|
+
// sleep 60 would hang forever; we set a very short timeout
|
|
81
|
+
writeMock(mockBin, "sleep 60");
|
|
82
|
+
const result = downloadTranscript("vid1", downloadDir, mockBin, 300); // 300ms
|
|
83
|
+
expect(result).toBe("timeout");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test("timeout does not leave zombie processes", async () => {
|
|
87
|
+
writeMock(mockBin, "sleep 60");
|
|
88
|
+
downloadTranscript("vid2", downloadDir, mockBin, 200);
|
|
89
|
+
// Give the OS a moment to reap the killed child
|
|
90
|
+
await Bun.sleep(100);
|
|
91
|
+
// If we reach here without hanging, the process was killed cleanly
|
|
92
|
+
expect(true).toBe(true);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("accepts info.json + VTT written by mock and returns 'ok'", () => {
|
|
96
|
+
// Mock that writes the expected output files
|
|
97
|
+
const videoId = "abc123";
|
|
98
|
+
const script = [
|
|
99
|
+
// Write a minimal info.json
|
|
100
|
+
`echo '{"id":"${videoId}","title":"Test","duration":300}' > "${join(downloadDir, `${videoId}.info.json`)}"`,
|
|
101
|
+
// Write a minimal VTT
|
|
102
|
+
`printf 'WEBVTT\\n\\n00:00:01.000 --> 00:00:05.000\\nHello world\\n' > "${join(downloadDir, `${videoId}.en.vtt`)}"`,
|
|
103
|
+
"exit 0",
|
|
104
|
+
].join("\n");
|
|
105
|
+
writeMock(mockBin, script);
|
|
106
|
+
|
|
107
|
+
const result = downloadTranscript(videoId, downloadDir, mockBin);
|
|
108
|
+
expect(result).toBe("ok");
|
|
109
|
+
|
|
110
|
+
// Files should be present (main() would clean them up; we don't call main() here)
|
|
111
|
+
const infoContent = readFileSync(join(downloadDir, `${videoId}.info.json`), "utf8");
|
|
112
|
+
expect(JSON.parse(infoContent).id).toBe(videoId);
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// ── listPlaylist failure modes ────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
describe("listPlaylist", () => {
|
|
119
|
+
test("throws when yt-dlp exits non-zero", () => {
|
|
120
|
+
writeMock(mockBin, "exit 1");
|
|
121
|
+
expect(() => listPlaylist("https://example.com", mockBin)).toThrow(/failed/);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test("throws on timeout", () => {
|
|
125
|
+
writeMock(mockBin, "sleep 60");
|
|
126
|
+
expect(() => listPlaylist("https://example.com", mockBin, 300)).toThrow(/timed out/);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
test("parses NDJSON output into entries", () => {
|
|
130
|
+
const ndjsonFile = join(tmpBase, "playlist.ndjson");
|
|
131
|
+
writeFileSync(
|
|
132
|
+
ndjsonFile,
|
|
133
|
+
`${[
|
|
134
|
+
JSON.stringify({ id: "abc", title: "RouterOS VLAN Tutorial", duration: 600 }),
|
|
135
|
+
JSON.stringify({ id: "def", title: "Firewall Filter Guide", duration: 900 }),
|
|
136
|
+
JSON.stringify({ id: "ghi", title: "BGP Configuration", duration: 450 }),
|
|
137
|
+
].join("\n")}\n`,
|
|
138
|
+
);
|
|
139
|
+
writeMock(mockBin, `cat "${ndjsonFile}"`);
|
|
140
|
+
|
|
141
|
+
const result = listPlaylist("https://example.com", mockBin);
|
|
142
|
+
expect(result).toHaveLength(3);
|
|
143
|
+
expect(result[0]).toEqual({ id: "abc", title: "RouterOS VLAN Tutorial", duration: 600 });
|
|
144
|
+
expect(result[1].id).toBe("def");
|
|
145
|
+
expect(result[2].duration).toBe(450);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test("skips malformed NDJSON lines gracefully", () => {
|
|
149
|
+
const ndjsonFile = join(tmpBase, "playlist-bad.ndjson");
|
|
150
|
+
writeFileSync(
|
|
151
|
+
ndjsonFile,
|
|
152
|
+
`${[
|
|
153
|
+
JSON.stringify({ id: "abc", title: "Good Video", duration: 300 }),
|
|
154
|
+
"not valid json at all",
|
|
155
|
+
JSON.stringify({ id: "def", title: "Another Good One", duration: 500 }),
|
|
156
|
+
'{"missing_id": true}', // no id field
|
|
157
|
+
].join("\n")}\n`,
|
|
158
|
+
);
|
|
159
|
+
writeMock(mockBin, `cat "${ndjsonFile}"`);
|
|
160
|
+
|
|
161
|
+
const result = listPlaylist("https://example.com", mockBin);
|
|
162
|
+
expect(result).toHaveLength(2); // only the two valid entries with id+title
|
|
163
|
+
expect(result[0].id).toBe("abc");
|
|
164
|
+
expect(result[1].id).toBe("def");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
test("returns empty array for empty output", () => {
|
|
168
|
+
writeMock(mockBin, "exit 0"); // exits 0 with no stdout
|
|
169
|
+
const result = listPlaylist("https://example.com", mockBin);
|
|
170
|
+
expect(result).toHaveLength(0);
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// ── YTDLP_DEFAULT env var override ───────────────────────────────────────────
|
|
175
|
+
|
|
176
|
+
describe("YTDLP_DEFAULT", () => {
|
|
177
|
+
test("is set from YTDLP env var if provided", async () => {
|
|
178
|
+
// import is cached, so we can't re-import with a different env var,
|
|
179
|
+
// but we can verify the module exports YTDLP_DEFAULT for inspection
|
|
180
|
+
const { YTDLP_DEFAULT } = await import("./extract-videos.ts");
|
|
181
|
+
// In test env YTDLP is not set, so it defaults to "yt-dlp"
|
|
182
|
+
// (or whatever the test runner's env has — just ensure it's a string)
|
|
183
|
+
expect(typeof YTDLP_DEFAULT).toBe("string");
|
|
184
|
+
expect(YTDLP_DEFAULT.length).toBeGreaterThan(0);
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// ── Cache: loadKnownBad ───────────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
describe("loadKnownBad", () => {
|
|
191
|
+
test("returns empty Set for non-existent file", () => {
|
|
192
|
+
const result = loadKnownBad(join(tmpBase, "nonexistent-known-bad.json"));
|
|
193
|
+
expect(result.size).toBe(0);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
test("returns Set of IDs from valid JSON", () => {
|
|
197
|
+
const path = join(tmpBase, "known-bad.json");
|
|
198
|
+
writeFileSync(path, JSON.stringify({ abc123: "non-English: Russian", def456: "private video" }), "utf8");
|
|
199
|
+
const result = loadKnownBad(path);
|
|
200
|
+
expect(result.size).toBe(2);
|
|
201
|
+
expect(result.has("abc123")).toBe(true);
|
|
202
|
+
expect(result.has("def456")).toBe(true);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
test("ignores keys starting with _", () => {
|
|
206
|
+
const path = join(tmpBase, "known-bad-comment.json");
|
|
207
|
+
writeFileSync(path, JSON.stringify({ _comment: "metadata", vid1: "reason" }), "utf8");
|
|
208
|
+
const result = loadKnownBad(path);
|
|
209
|
+
expect(result.has("_comment")).toBe(false);
|
|
210
|
+
expect(result.has("vid1")).toBe(true);
|
|
211
|
+
expect(result.size).toBe(1);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
test("returns empty Set for malformed JSON", () => {
|
|
215
|
+
const path = join(tmpBase, "known-bad-malformed.json");
|
|
216
|
+
writeFileSync(path, "not valid json { at all", "utf8");
|
|
217
|
+
const result = loadKnownBad(path);
|
|
218
|
+
expect(result.size).toBe(0);
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// ── Cache: findLatestCache ────────────────────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
describe("findLatestCache", () => {
|
|
225
|
+
let cacheRoot: string;
|
|
226
|
+
|
|
227
|
+
beforeAll(() => {
|
|
228
|
+
cacheRoot = join(tmpBase, "transcripts-find-test");
|
|
229
|
+
mkdirSync(cacheRoot, { recursive: true });
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
test("returns null when transcripts dir does not exist", () => {
|
|
233
|
+
const result = findLatestCache(join(tmpBase, "no-such-dir"));
|
|
234
|
+
expect(result).toBeNull();
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
test("returns null when transcripts dir has no date subdirs", () => {
|
|
238
|
+
const emptyRoot = join(tmpBase, "transcripts-empty");
|
|
239
|
+
mkdirSync(join(emptyRoot, "transcripts"), { recursive: true });
|
|
240
|
+
const result = findLatestCache(emptyRoot);
|
|
241
|
+
expect(result).toBeNull();
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test("returns path to the most recent videos.ndjson", () => {
|
|
245
|
+
const t = join(tmpBase, "tc1");
|
|
246
|
+
mkdirSync(join(t, "transcripts", "2024-01-01"), { recursive: true });
|
|
247
|
+
mkdirSync(join(t, "transcripts", "2024-06-15"), { recursive: true });
|
|
248
|
+
mkdirSync(join(t, "transcripts", "2024-03-10"), { recursive: true });
|
|
249
|
+
writeFileSync(join(t, "transcripts", "2024-01-01", "videos.ndjson"), "", "utf8");
|
|
250
|
+
writeFileSync(join(t, "transcripts", "2024-06-15", "videos.ndjson"), "", "utf8");
|
|
251
|
+
writeFileSync(join(t, "transcripts", "2024-03-10", "videos.ndjson"), "", "utf8");
|
|
252
|
+
const result = findLatestCache(t);
|
|
253
|
+
expect(result).toContain("2024-06-15");
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
test("skips dirs without videos.ndjson", () => {
|
|
257
|
+
const t = join(tmpBase, "tc2");
|
|
258
|
+
mkdirSync(join(t, "transcripts", "2025-01-01"), { recursive: true });
|
|
259
|
+
mkdirSync(join(t, "transcripts", "2024-12-31"), { recursive: true });
|
|
260
|
+
// Only older dir has the file
|
|
261
|
+
writeFileSync(join(t, "transcripts", "2024-12-31", "videos.ndjson"), "", "utf8");
|
|
262
|
+
const result = findLatestCache(t);
|
|
263
|
+
expect(result).toContain("2024-12-31");
|
|
264
|
+
});
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
// ── Cache: saveCache + importCache ────────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
describe("saveCache + importCache", () => {
|
|
270
|
+
let cacheDir: string;
|
|
271
|
+
const VIDEO_ID = "cache-test-vid1";
|
|
272
|
+
const VIDEO_ID_2 = "cache-test-vid2";
|
|
273
|
+
|
|
274
|
+
beforeAll(() => {
|
|
275
|
+
cacheDir = join(tmpBase, "cache-out");
|
|
276
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
277
|
+
|
|
278
|
+
// Insert two test videos into the in-memory DB
|
|
279
|
+
db.run(`
|
|
280
|
+
INSERT OR REPLACE INTO videos (video_id, title, description, channel, upload_date, duration_s, url, view_count, like_count, has_chapters)
|
|
281
|
+
VALUES ('${VIDEO_ID}', 'Test Video One', 'A description', 'MikroTik', '20240101', 300, 'https://www.youtube.com/watch?v=${VIDEO_ID}', 1000, 50, 1)
|
|
282
|
+
`);
|
|
283
|
+
const vid1 = db.prepare("SELECT id FROM videos WHERE video_id = ?").get(VIDEO_ID) as { id: number };
|
|
284
|
+
db.run(`INSERT INTO video_segments (video_id, chapter_title, start_s, end_s, transcript, sort_order) VALUES (${vid1.id}, 'Intro', 0, 60, 'Hello world', 0)`);
|
|
285
|
+
db.run(`INSERT INTO video_segments (video_id, chapter_title, start_s, end_s, transcript, sort_order) VALUES (${vid1.id}, 'Setup', 60, 120, 'Now configure routeros', 1)`);
|
|
286
|
+
|
|
287
|
+
db.run(`
|
|
288
|
+
INSERT OR REPLACE INTO videos (video_id, title, description, channel, upload_date, duration_s, url, view_count, like_count, has_chapters)
|
|
289
|
+
VALUES ('${VIDEO_ID_2}', 'Test Video Two', NULL, 'MikroTik', '20240201', 180, 'https://www.youtube.com/watch?v=${VIDEO_ID_2}', 500, 20, 0)
|
|
290
|
+
`);
|
|
291
|
+
const vid2 = db.prepare("SELECT id FROM videos WHERE video_id = ?").get(VIDEO_ID_2) as { id: number };
|
|
292
|
+
db.run(`INSERT INTO video_segments (video_id, chapter_title, start_s, end_s, transcript, sort_order) VALUES (${vid2.id}, NULL, 0, NULL, 'Single segment content here', 0)`);
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test("saveCache writes NDJSON with correct video count", () => {
|
|
296
|
+
const outPath = join(cacheDir, "videos.ndjson");
|
|
297
|
+
const count = saveCache(outPath);
|
|
298
|
+
expect(count).toBeGreaterThanOrEqual(2); // at least our two test videos
|
|
299
|
+
const content = readFileSync(outPath, "utf8");
|
|
300
|
+
const lines = content.split("\n").filter(Boolean);
|
|
301
|
+
expect(lines.length).toBe(count);
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
test("saveCache NDJSON contains correct video data", () => {
|
|
305
|
+
const outPath = join(cacheDir, "videos.ndjson");
|
|
306
|
+
const content = readFileSync(outPath, "utf8");
|
|
307
|
+
const lines = content.split("\n").filter(Boolean);
|
|
308
|
+
const vid1Entry = lines.map((l) => JSON.parse(l)).find((e: { video_id: string }) => e.video_id === VIDEO_ID);
|
|
309
|
+
expect(vid1Entry).toBeDefined();
|
|
310
|
+
expect(vid1Entry.title).toBe("Test Video One");
|
|
311
|
+
expect(vid1Entry.has_chapters).toBe(1);
|
|
312
|
+
expect(vid1Entry.segments).toHaveLength(2);
|
|
313
|
+
expect(vid1Entry.segments[0].chapter_title).toBe("Intro");
|
|
314
|
+
expect(vid1Entry.segments[1].transcript).toBe("Now configure routeros");
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
test("importCache is idempotent (skips existing videos)", () => {
|
|
318
|
+
const outPath = join(cacheDir, "videos.ndjson");
|
|
319
|
+
// Videos already in DB, importing again should return skipped > 0, imported = 0
|
|
320
|
+
const result = importCache(outPath);
|
|
321
|
+
expect(result.imported).toBe(0);
|
|
322
|
+
expect(result.skipped).toBeGreaterThanOrEqual(2);
|
|
323
|
+
expect(result.knownBadSkipped).toBe(0);
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
test("importCache with force=true re-inserts existing videos", () => {
|
|
327
|
+
const outPath = join(cacheDir, "videos.ndjson");
|
|
328
|
+
const result = importCache(outPath, { force: true });
|
|
329
|
+
expect(result.imported).toBeGreaterThanOrEqual(2);
|
|
330
|
+
expect(result.skipped).toBe(0);
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
test("importCache skips known-bad IDs", () => {
|
|
334
|
+
// Write a small NDJSON with one known-bad video
|
|
335
|
+
const singlePath = join(cacheDir, "single.ndjson");
|
|
336
|
+
const entry = { video_id: "skipme", title: "Skip This", description: null, channel: null, upload_date: null, duration_s: 200, url: "https://www.youtube.com/watch?v=skipme", view_count: null, like_count: null, has_chapters: 0, segments: [] };
|
|
337
|
+
writeFileSync(singlePath, `${JSON.stringify(entry)}\n`, "utf8");
|
|
338
|
+
|
|
339
|
+
const knownBad = new Set(["skipme"]);
|
|
340
|
+
const result = importCache(singlePath, { knownBad });
|
|
341
|
+
expect(result.knownBadSkipped).toBe(1);
|
|
342
|
+
expect(result.imported).toBe(0);
|
|
343
|
+
|
|
344
|
+
// Verify it was NOT inserted
|
|
345
|
+
const row = db.prepare("SELECT id FROM videos WHERE video_id = 'skipme'").get();
|
|
346
|
+
expect(row).toBeNull();
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
test("importCache handles malformed NDJSON lines gracefully", () => {
|
|
350
|
+
const badPath = join(cacheDir, "bad.ndjson");
|
|
351
|
+
writeFileSync(badPath, `not json\n${JSON.stringify({ video_id: "validone", title: "OK", description: null, channel: null, upload_date: null, duration_s: 100, url: "https://www.youtube.com/watch?v=validone", view_count: null, like_count: null, has_chapters: 0, segments: [] })}\n`, "utf8");
|
|
352
|
+
const result = importCache(badPath);
|
|
353
|
+
// Should import 1 valid video, skip the malformed line
|
|
354
|
+
expect(result.imported + result.skipped).toBeGreaterThanOrEqual(1);
|
|
355
|
+
});
|
|
356
|
+
});
|