@pagepocket/plugin-yt-dlp 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ export * from "./yt-dlp-plugin.js";
2
+ export { default as YtDlpPlugin } from "./yt-dlp-plugin.js";
package/dist/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export * from "./yt-dlp-plugin.js";
2
+ export { default as YtDlpPlugin } from "./yt-dlp-plugin.js";
@@ -0,0 +1,3 @@
1
+ import type { FileTree, FileTreeDirectory, FileTreeEntry } from "@pagepocket/lib";
2
+ export declare const withInsertedFile: (root: FileTreeDirectory, relPath: string, fileEntry: FileTreeEntry) => FileTreeDirectory;
3
+ export declare const emptyFileTree: () => FileTree;
@@ -0,0 +1,39 @@
1
+ const ensureDirectory = (dir, name) => {
2
+ const existingIndex = dir.entries.findIndex((e) => e.kind === "directory" && e.path === name);
3
+ if (existingIndex >= 0) {
4
+ return { dir: dir.entries[existingIndex], entryIndex: existingIndex };
5
+ }
6
+ const nextDir = { kind: "directory", path: name, entries: [] };
7
+ return { dir: nextDir, entryIndex: -1 };
8
+ };
9
+ export const withInsertedFile = (root, relPath, fileEntry) => {
10
+ const clean = relPath.replace(/^\/+/, "");
11
+ const parts = clean.split("/").filter(Boolean);
12
+ if (parts.length === 0) {
13
+ return root;
14
+ }
15
+ const filename = parts[parts.length - 1];
16
+ const dirs = parts.slice(0, -1);
17
+ const insert = (dir, remainingDirs) => {
18
+ if (remainingDirs.length === 0) {
19
+ const nextEntries = dir.entries.filter((e) => !(e.kind === "file" && e.path === filename));
20
+ nextEntries.push(fileEntry);
21
+ return { ...dir, entries: nextEntries };
22
+ }
23
+ const nextName = remainingDirs[0];
24
+ const { dir: childDir, entryIndex } = ensureDirectory(dir, nextName);
25
+ const nextChild = insert(childDir, remainingDirs.slice(1));
26
+ const nextEntries = [...dir.entries];
27
+ if (entryIndex >= 0) {
28
+ nextEntries[entryIndex] = nextChild;
29
+ }
30
+ else {
31
+ nextEntries.push(nextChild);
32
+ }
33
+ return { ...dir, entries: nextEntries };
34
+ };
35
+ return insert(root, dirs);
36
+ };
37
+ export const emptyFileTree = () => ({
38
+ root: { kind: "directory", path: "", entries: [] }
39
+ });
@@ -0,0 +1,19 @@
1
+ type SetupValue = {
2
+ dateString: string;
3
+ dirPrefix: string;
4
+ };
5
+ export type YoutubeJob = {
6
+ id: string;
7
+ url: string;
8
+ relPath: string;
9
+ };
10
+ export declare const parseYoutubeEmbedSrc: (src: string) => {
11
+ id: string;
12
+ url: string;
13
+ } | null;
14
+ export declare class YtDlpJobManager {
15
+ createSetupValue(now?: Date): SetupValue;
16
+ buildVideoRelPath(setupValue: SetupValue, videoId: string): string;
17
+ discoverJobsFromHtml(htmlString: string, setupValue: SetupValue): YoutubeJob[];
18
+ }
19
+ export type { SetupValue };
@@ -0,0 +1,65 @@
1
+ import { formatCompactDateTime } from "@pagepocket/shared";
2
+ export const parseYoutubeEmbedSrc = (src) => {
3
+ try {
4
+ const url = new URL(src, "https://www.youtube.com");
5
+ const host = url.hostname;
6
+ if (!host.endsWith("youtube.com") && !host.endsWith("youtube-nocookie.com")) {
7
+ return null;
8
+ }
9
+ const parts = url.pathname.split("/").filter(Boolean);
10
+ const embedIndex = parts.indexOf("embed");
11
+ if (embedIndex === -1) {
12
+ return null;
13
+ }
14
+ const id = parts[embedIndex + 1];
15
+ if (!id) {
16
+ return null;
17
+ }
18
+ return { id, url: `https://www.youtube.com/watch?v=${id}` };
19
+ }
20
+ catch {
21
+ return null;
22
+ }
23
+ };
24
+ const uniqueById = (jobs) => {
25
+ const seen = new Set();
26
+ const out = [];
27
+ for (const job of jobs) {
28
+ if (seen.has(job.id)) {
29
+ continue;
30
+ }
31
+ seen.add(job.id);
32
+ out.push(job);
33
+ }
34
+ return out;
35
+ };
36
+ export class YtDlpJobManager {
37
+ createSetupValue(now = new Date()) {
38
+ const dateString = formatCompactDateTime(now);
39
+ const dirPrefix = `yt-videos/${dateString}`;
40
+ return { dateString, dirPrefix };
41
+ }
42
+ buildVideoRelPath(setupValue, videoId) {
43
+ return `${setupValue.dirPrefix}/${videoId}.mp4`;
44
+ }
45
+ discoverJobsFromHtml(htmlString, setupValue) {
46
+ const jobs = [];
47
+ const iframeSrcRegex = /<iframe\b[^>]*\bsrc\s*=\s*"([^"]+)"[^>]*>/gi;
48
+ for (const m of htmlString.matchAll(iframeSrcRegex)) {
49
+ const src = m[1];
50
+ if (!src) {
51
+ continue;
52
+ }
53
+ const parsed = parseYoutubeEmbedSrc(src);
54
+ if (!parsed) {
55
+ continue;
56
+ }
57
+ jobs.push({
58
+ id: parsed.id,
59
+ url: parsed.url,
60
+ relPath: this.buildVideoRelPath(setupValue, parsed.id)
61
+ });
62
+ }
63
+ return uniqueById(jobs);
64
+ }
65
+ }
@@ -0,0 +1,2 @@
1
+ import type { YoutubeJob } from "./yt-dlp-job-manager.js";
2
+ export declare const downloadVideosAsBytes: (jobs: YoutubeJob[]) => Promise<Map<string, Uint8Array<ArrayBufferLike>>>;
@@ -0,0 +1,33 @@
1
+ export const downloadVideosAsBytes = async (jobs) => {
2
+ const { YtDlp, helpers } = await import("ytdlp-nodejs");
3
+ const fs = await import("node:fs/promises");
4
+ const pathMod = await import("node:path");
5
+ const os = await import("node:os");
6
+ const ytdlp = new YtDlp();
7
+ await helpers.downloadYtDlp();
8
+ await helpers.downloadFFmpeg();
9
+ const bytesById = new Map();
10
+ await Promise.all(jobs.map(async (job) => {
11
+ const tmpDir = await fs.mkdtemp(pathMod.join(os.tmpdir(), "pagepocket-ytdlp-"));
12
+ try {
13
+ const outTemplate = pathMod.join(tmpDir, "%(id)s.%(ext)s");
14
+ await ytdlp.downloadAsync(job.url, {
15
+ format: { filter: "mergevideo", quality: "highest", type: "mp4" },
16
+ output: outTemplate
17
+ });
18
+ const entries = await fs.readdir(tmpDir);
19
+ const prefix = `${job.id}.`;
20
+ const filename = entries.find((e) => e.startsWith(prefix));
21
+ if (!filename) {
22
+ return;
23
+ }
24
+ const abs = pathMod.join(tmpDir, filename);
25
+ const bytes = await fs.readFile(abs);
26
+ bytesById.set(job.id, new Uint8Array(bytes));
27
+ }
28
+ finally {
29
+ await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => { });
30
+ }
31
+ }));
32
+ return bytesById;
33
+ };
@@ -0,0 +1,9 @@
1
+ import type { Plugin, PluginHost, UnitContributeContext, UnitPatch, UnitRuntime } from "@pagepocket/lib";
2
+ import { type SetupValue } from "./utils/yt-dlp-job-manager.js";
3
+ export default class YtDlpPlugin implements Plugin<SetupValue> {
4
+ readonly name = "plugin:yt-dlp";
5
+ static readonly unitId = "buildSnapshot";
6
+ private jobManager;
7
+ setup(host: PluginHost): SetupValue;
8
+ contribute(ctx: UnitContributeContext, _rt: UnitRuntime): Promise<UnitPatch>;
9
+ }
@@ -0,0 +1,81 @@
1
+ import { emptyFileTree, withInsertedFile } from "./utils/vtree.js";
2
+ import { YtDlpJobManager, parseYoutubeEmbedSrc } from "./utils/yt-dlp-job-manager.js";
3
+ import { downloadVideosAsBytes } from "./utils/ytdlp.js";
4
+ const buildFilesForVideos = (input) => {
5
+ let root = emptyFileTree().root;
6
+ for (const job of input.jobs) {
7
+ const bytes = input.bytesById.get(job.id);
8
+ if (!bytes) {
9
+ continue;
10
+ }
11
+ const filename = job.relPath.split("/").pop() ?? `${job.id}.mp4`;
12
+ const entry = {
13
+ kind: "file",
14
+ path: filename,
15
+ source: { kind: "bytes", data: bytes }
16
+ };
17
+ root = withInsertedFile(root, job.relPath, entry);
18
+ }
19
+ return { root };
20
+ };
21
+ class YtDlpPlugin {
22
+ constructor() {
23
+ this.name = "plugin:yt-dlp";
24
+ this.jobManager = new YtDlpJobManager();
25
+ }
26
+ setup(host) {
27
+ const setupValue = this.jobManager.createSetupValue();
28
+ const rules = [
29
+ {
30
+ name: "yt-dlp:replace-youtube-embeds",
31
+ query: "iframe[src*='youtube.com/embed'], iframe[src*='youtube-nocookie.com/embed']",
32
+ run: async (ctx) => {
33
+ const $el = ctx.$el;
34
+ const src = $el.attr("src");
35
+ if (!src) {
36
+ return;
37
+ }
38
+ const parsed = parseYoutubeEmbedSrc(src);
39
+ if (!parsed) {
40
+ return;
41
+ }
42
+ const relPath = this.jobManager.buildVideoRelPath(setupValue, parsed.id);
43
+ return {
44
+ type: "replaceWithHtml",
45
+ html: `<video controls src="/${relPath}"></video>`
46
+ };
47
+ }
48
+ }
49
+ ];
50
+ host.elements.contribute({ type: "plugin", name: this.name }, rules);
51
+ return setupValue;
52
+ }
53
+ async contribute(ctx, _rt) {
54
+ const setupValue = ctx.setupValue;
55
+ if (!setupValue) {
56
+ return {};
57
+ }
58
+ const html = ctx.value.html;
59
+ if (!html?.htmlString) {
60
+ return {};
61
+ }
62
+ const jobs = this.jobManager.discoverJobsFromHtml(html.htmlString, setupValue);
63
+ if (jobs.length === 0) {
64
+ return {};
65
+ }
66
+ try {
67
+ const bytesById = await downloadVideosAsBytes(jobs);
68
+ if (bytesById.size === 0) {
69
+ return {};
70
+ }
71
+ const files = buildFilesForVideos({ jobs, bytesById });
72
+ return { files };
73
+ }
74
+ catch (e) {
75
+ console.error(e);
76
+ }
77
+ return {};
78
+ }
79
+ }
80
+ YtDlpPlugin.unitId = "buildSnapshot";
81
+ export default YtDlpPlugin;
package/package.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "@pagepocket/plugin-yt-dlp",
3
+ "version": "0.8.0",
4
+ "description": "PagePocket plugin: download YouTube embeds and replace with <video>",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "files": [
9
+ "dist"
10
+ ],
11
+ "license": "ISC",
12
+ "dependencies": {
13
+ "ytdlp-nodejs": "^3.4.2",
14
+ "@pagepocket/lib": "0.8.0",
15
+ "@pagepocket/shared": "0.8.0",
16
+ "@pagepocket/contracts": "0.8.0"
17
+ },
18
+ "devDependencies": {
19
+ "typescript": "^5.4.5"
20
+ },
21
+ "scripts": {
22
+ "build": "tsc -p tsconfig.json",
23
+ "test": "node -e \"process.exit(0)\""
24
+ }
25
+ }