@oh-my-pi/pi-coding-agent 3.24.0 → 3.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/package.json +4 -4
- package/src/core/custom-commands/bundled/wt/index.ts +3 -0
- package/src/core/sdk.ts +7 -0
- package/src/core/tools/complete.ts +129 -0
- package/src/core/tools/index.test.ts +9 -1
- package/src/core/tools/index.ts +18 -5
- package/src/core/tools/jtd-to-json-schema.ts +252 -0
- package/src/core/tools/output.ts +125 -14
- package/src/core/tools/read.ts +4 -4
- package/src/core/tools/task/artifacts.ts +6 -9
- package/src/core/tools/task/executor.ts +189 -24
- package/src/core/tools/task/index.ts +23 -18
- package/src/core/tools/task/name-generator.ts +1577 -0
- package/src/core/tools/task/render.ts +137 -8
- package/src/core/tools/task/types.ts +26 -5
- package/src/core/tools/task/worker-protocol.ts +1 -0
- package/src/core/tools/task/worker.ts +136 -14
- package/src/core/tools/web-fetch-handlers/academic.test.ts +239 -0
- package/src/core/tools/web-fetch-handlers/artifacthub.ts +210 -0
- package/src/core/tools/web-fetch-handlers/arxiv.ts +84 -0
- package/src/core/tools/web-fetch-handlers/aur.ts +171 -0
- package/src/core/tools/web-fetch-handlers/biorxiv.ts +136 -0
- package/src/core/tools/web-fetch-handlers/bluesky.ts +277 -0
- package/src/core/tools/web-fetch-handlers/brew.ts +173 -0
- package/src/core/tools/web-fetch-handlers/business.test.ts +82 -0
- package/src/core/tools/web-fetch-handlers/cheatsh.ts +73 -0
- package/src/core/tools/web-fetch-handlers/chocolatey.ts +153 -0
- package/src/core/tools/web-fetch-handlers/coingecko.ts +179 -0
- package/src/core/tools/web-fetch-handlers/crates-io.ts +123 -0
- package/src/core/tools/web-fetch-handlers/dev-platforms.test.ts +254 -0
- package/src/core/tools/web-fetch-handlers/devto.ts +173 -0
- package/src/core/tools/web-fetch-handlers/discogs.ts +303 -0
- package/src/core/tools/web-fetch-handlers/dockerhub.ts +156 -0
- package/src/core/tools/web-fetch-handlers/documentation.test.ts +85 -0
- package/src/core/tools/web-fetch-handlers/finance-media.test.ts +144 -0
- package/src/core/tools/web-fetch-handlers/git-hosting.test.ts +272 -0
- package/src/core/tools/web-fetch-handlers/github-gist.ts +64 -0
- package/src/core/tools/web-fetch-handlers/github.ts +424 -0
- package/src/core/tools/web-fetch-handlers/gitlab.ts +444 -0
- package/src/core/tools/web-fetch-handlers/go-pkg.ts +271 -0
- package/src/core/tools/web-fetch-handlers/hackage.ts +89 -0
- package/src/core/tools/web-fetch-handlers/hackernews.ts +208 -0
- package/src/core/tools/web-fetch-handlers/hex.ts +121 -0
- package/src/core/tools/web-fetch-handlers/huggingface.ts +385 -0
- package/src/core/tools/web-fetch-handlers/iacr.ts +82 -0
- package/src/core/tools/web-fetch-handlers/index.ts +69 -0
- package/src/core/tools/web-fetch-handlers/lobsters.ts +186 -0
- package/src/core/tools/web-fetch-handlers/mastodon.ts +302 -0
- package/src/core/tools/web-fetch-handlers/maven.ts +147 -0
- package/src/core/tools/web-fetch-handlers/mdn.ts +174 -0
- package/src/core/tools/web-fetch-handlers/media.test.ts +138 -0
- package/src/core/tools/web-fetch-handlers/metacpan.ts +247 -0
- package/src/core/tools/web-fetch-handlers/npm.ts +107 -0
- package/src/core/tools/web-fetch-handlers/nuget.ts +201 -0
- package/src/core/tools/web-fetch-handlers/nvd.ts +238 -0
- package/src/core/tools/web-fetch-handlers/opencorporates.ts +273 -0
- package/src/core/tools/web-fetch-handlers/openlibrary.ts +313 -0
- package/src/core/tools/web-fetch-handlers/osv.ts +184 -0
- package/src/core/tools/web-fetch-handlers/package-managers-2.test.ts +199 -0
- package/src/core/tools/web-fetch-handlers/package-managers.test.ts +171 -0
- package/src/core/tools/web-fetch-handlers/package-registries.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/packagist.ts +170 -0
- package/src/core/tools/web-fetch-handlers/pub-dev.ts +185 -0
- package/src/core/tools/web-fetch-handlers/pubmed.ts +174 -0
- package/src/core/tools/web-fetch-handlers/pypi.ts +125 -0
- package/src/core/tools/web-fetch-handlers/readthedocs.ts +122 -0
- package/src/core/tools/web-fetch-handlers/reddit.ts +100 -0
- package/src/core/tools/web-fetch-handlers/repology.ts +257 -0
- package/src/core/tools/web-fetch-handlers/research.test.ts +107 -0
- package/src/core/tools/web-fetch-handlers/rfc.ts +205 -0
- package/src/core/tools/web-fetch-handlers/rubygems.ts +112 -0
- package/src/core/tools/web-fetch-handlers/sec-edgar.ts +269 -0
- package/src/core/tools/web-fetch-handlers/security.test.ts +103 -0
- package/src/core/tools/web-fetch-handlers/semantic-scholar.ts +190 -0
- package/src/core/tools/web-fetch-handlers/social-extended.test.ts +192 -0
- package/src/core/tools/web-fetch-handlers/social.test.ts +259 -0
- package/src/core/tools/web-fetch-handlers/spotify.ts +218 -0
- package/src/core/tools/web-fetch-handlers/stackexchange.test.ts +120 -0
- package/src/core/tools/web-fetch-handlers/stackoverflow.ts +123 -0
- package/src/core/tools/web-fetch-handlers/standards.test.ts +122 -0
- package/src/core/tools/web-fetch-handlers/terraform.ts +296 -0
- package/src/core/tools/web-fetch-handlers/tldr.ts +47 -0
- package/src/core/tools/web-fetch-handlers/twitter.ts +84 -0
- package/src/core/tools/web-fetch-handlers/types.ts +163 -0
- package/src/core/tools/web-fetch-handlers/utils.ts +91 -0
- package/src/core/tools/web-fetch-handlers/vimeo.ts +152 -0
- package/src/core/tools/web-fetch-handlers/wikidata.ts +349 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.test.ts +73 -0
- package/src/core/tools/web-fetch-handlers/wikipedia.ts +91 -0
- package/src/core/tools/web-fetch-handlers/youtube.test.ts +198 -0
- package/src/core/tools/web-fetch-handlers/youtube.ts +319 -0
- package/src/core/tools/web-fetch.ts +152 -1324
- package/src/prompts/task.md +14 -50
- package/src/prompts/tools/output.md +2 -1
- package/src/prompts/tools/task.md +3 -1
- package/src/utils/tools-manager.ts +110 -8
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import { handleYouTube } from "./youtube";
|
|
3
|
+
|
|
4
|
+
const SKIP = !process.env.WEB_FETCH_INTEGRATION;
|
|
5
|
+
|
|
6
|
+
describe.skipIf(SKIP)("handleYouTube", () => {
|
|
7
|
+
it("returns null for non-YouTube URLs", async () => {
|
|
8
|
+
const result = await handleYouTube("https://example.com", 10);
|
|
9
|
+
expect(result).toBeNull();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("returns null for invalid YouTube URLs", async () => {
|
|
13
|
+
const result = await handleYouTube("https://youtube.com/invalid", 10);
|
|
14
|
+
expect(result).toBeNull();
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("handles youtube.com/watch?v= format", async () => {
|
|
18
|
+
// Use Rick Astley's "Never Gonna Give You Up" - a stable, well-known video
|
|
19
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
20
|
+
expect(result).not.toBeNull();
|
|
21
|
+
expect(result?.method).toMatch(/^youtube/);
|
|
22
|
+
expect(result?.contentType).toBe("text/markdown");
|
|
23
|
+
expect(result?.content).toContain("Video ID");
|
|
24
|
+
expect(result?.content).toContain("dQw4w9WgXcQ");
|
|
25
|
+
}, 30000);
|
|
26
|
+
|
|
27
|
+
it("handles youtu.be/ short format", async () => {
|
|
28
|
+
const result = await handleYouTube("https://youtu.be/dQw4w9WgXcQ", 30);
|
|
29
|
+
expect(result).not.toBeNull();
|
|
30
|
+
expect(result?.method).toMatch(/^youtube/);
|
|
31
|
+
expect(result?.content).toContain("dQw4w9WgXcQ");
|
|
32
|
+
}, 30000);
|
|
33
|
+
|
|
34
|
+
it("handles youtube.com/shorts/ format", async () => {
|
|
35
|
+
// Use a stable YouTube Shorts video
|
|
36
|
+
const result = await handleYouTube("https://www.youtube.com/shorts/jNQXAC9IVRw", 30);
|
|
37
|
+
expect(result).not.toBeNull();
|
|
38
|
+
expect(result?.method).toMatch(/^youtube/);
|
|
39
|
+
expect(result?.content).toContain("jNQXAC9IVRw");
|
|
40
|
+
}, 30000);
|
|
41
|
+
|
|
42
|
+
it("handles youtube.com/embed/ format", async () => {
|
|
43
|
+
const result = await handleYouTube("https://www.youtube.com/embed/dQw4w9WgXcQ", 30);
|
|
44
|
+
expect(result).not.toBeNull();
|
|
45
|
+
expect(result?.method).toMatch(/^youtube/);
|
|
46
|
+
expect(result?.content).toContain("dQw4w9WgXcQ");
|
|
47
|
+
}, 30000);
|
|
48
|
+
|
|
49
|
+
it("handles youtube.com/v/ format", async () => {
|
|
50
|
+
const result = await handleYouTube("https://www.youtube.com/v/dQw4w9WgXcQ", 30);
|
|
51
|
+
expect(result).not.toBeNull();
|
|
52
|
+
expect(result?.method).toMatch(/^youtube/);
|
|
53
|
+
expect(result?.content).toContain("dQw4w9WgXcQ");
|
|
54
|
+
}, 30000);
|
|
55
|
+
|
|
56
|
+
it("handles m.youtube.com mobile URLs", async () => {
|
|
57
|
+
const result = await handleYouTube("https://m.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
58
|
+
expect(result).not.toBeNull();
|
|
59
|
+
expect(result?.method).toMatch(/^youtube/);
|
|
60
|
+
expect(result?.content).toContain("dQw4w9WgXcQ");
|
|
61
|
+
}, 30000);
|
|
62
|
+
|
|
63
|
+
it("extracts video metadata when yt-dlp is available", async () => {
|
|
64
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
65
|
+
expect(result).not.toBeNull();
|
|
66
|
+
|
|
67
|
+
// If yt-dlp is available, should have metadata
|
|
68
|
+
if (result?.method === "youtube") {
|
|
69
|
+
expect(result.content).toContain("Video ID");
|
|
70
|
+
expect(result.content).toContain("Channel");
|
|
71
|
+
// May have duration, views, upload date, etc.
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// If yt-dlp is not available, should indicate that
|
|
75
|
+
if (result?.method === "youtube-no-ytdlp") {
|
|
76
|
+
expect(result.content).toContain("yt-dlp could not be installed");
|
|
77
|
+
expect(result.notes).toContain("yt-dlp installation failed");
|
|
78
|
+
}
|
|
79
|
+
}, 30000);
|
|
80
|
+
|
|
81
|
+
it("handles videos with transcripts gracefully", async () => {
|
|
82
|
+
// This video should have captions
|
|
83
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
84
|
+
expect(result).not.toBeNull();
|
|
85
|
+
|
|
86
|
+
if (result?.method === "youtube") {
|
|
87
|
+
// Either has transcript or explicitly notes it's not available
|
|
88
|
+
const hasTranscript = result.content.includes("Transcript");
|
|
89
|
+
const noTranscriptNote = result.content.includes("No transcript available");
|
|
90
|
+
expect(hasTranscript || noTranscriptNote).toBe(true);
|
|
91
|
+
}
|
|
92
|
+
}, 30000);
|
|
93
|
+
|
|
94
|
+
it("handles videos without transcripts gracefully", async () => {
|
|
95
|
+
// Many music videos lack captions, but this is not guaranteed
|
|
96
|
+
// Just verify the handler doesn't crash and provides some info
|
|
97
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=kJQP7kiw5Fk", 30);
|
|
98
|
+
expect(result).not.toBeNull();
|
|
99
|
+
|
|
100
|
+
if (result?.method === "youtube") {
|
|
101
|
+
// Should still have basic metadata
|
|
102
|
+
expect(result.content).toContain("Video ID");
|
|
103
|
+
}
|
|
104
|
+
}, 30000);
|
|
105
|
+
|
|
106
|
+
it("returns appropriate response when yt-dlp is not available", async () => {
|
|
107
|
+
// We can't force yt-dlp to be unavailable in tests, but we can verify
|
|
108
|
+
// the return structure matches expectations for both cases
|
|
109
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
110
|
+
expect(result).not.toBeNull();
|
|
111
|
+
|
|
112
|
+
// Should have one of these two methods
|
|
113
|
+
expect(["youtube", "youtube-no-ytdlp"]).toContain(result!.method);
|
|
114
|
+
|
|
115
|
+
// Both should have required fields
|
|
116
|
+
expect(result?.url).toBe("https://www.youtube.com/watch?v=dQw4w9WgXcQ");
|
|
117
|
+
expect(result?.finalUrl).toContain("youtube.com");
|
|
118
|
+
expect(result?.fetchedAt).toBeTruthy();
|
|
119
|
+
expect(typeof result?.truncated).toBe("boolean");
|
|
120
|
+
expect(Array.isArray(result?.notes)).toBe(true);
|
|
121
|
+
}, 30000);
|
|
122
|
+
|
|
123
|
+
it("normalizes video URLs to canonical format", async () => {
|
|
124
|
+
// Different input formats should normalize to same canonical URL
|
|
125
|
+
const result = await handleYouTube("https://youtu.be/dQw4w9WgXcQ", 30);
|
|
126
|
+
expect(result).not.toBeNull();
|
|
127
|
+
expect(result?.finalUrl).toBe("https://www.youtube.com/watch?v=dQw4w9WgXcQ");
|
|
128
|
+
}, 30000);
|
|
129
|
+
|
|
130
|
+
it("handles playlist URLs by extracting video ID", async () => {
|
|
131
|
+
const result = await handleYouTube(
|
|
132
|
+
"https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLrAXtmErZgOeiKm4sgNOknGvNjby9efdf",
|
|
133
|
+
30,
|
|
134
|
+
);
|
|
135
|
+
expect(result).not.toBeNull();
|
|
136
|
+
expect(result?.content).toContain("dQw4w9WgXcQ");
|
|
137
|
+
}, 30000);
|
|
138
|
+
|
|
139
|
+
it("includes subtitle source information when available", async () => {
|
|
140
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
141
|
+
|
|
142
|
+
if (result?.method === "youtube") {
|
|
143
|
+
// If transcript is present, should note the source
|
|
144
|
+
const hasManualNote = result.notes.includes("Using manual subtitles");
|
|
145
|
+
const hasAutoNote = result.notes.includes("Using auto-generated captions");
|
|
146
|
+
const hasNoSubsNote = result.notes.includes("No subtitles/captions available");
|
|
147
|
+
|
|
148
|
+
// Should have exactly one of these
|
|
149
|
+
const noteCount = [hasManualNote, hasAutoNote, hasNoSubsNote].filter(Boolean).length;
|
|
150
|
+
expect(noteCount).toBeGreaterThanOrEqual(1);
|
|
151
|
+
}
|
|
152
|
+
}, 30000);
|
|
153
|
+
|
|
154
|
+
it("formats duration in human readable format", async () => {
|
|
155
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
156
|
+
|
|
157
|
+
if (result?.method === "youtube" && result.content.includes("Duration")) {
|
|
158
|
+
// Should have duration in M:SS or H:MM:SS format
|
|
159
|
+
expect(result.content).toMatch(/Duration.*\d+:\d{2}/);
|
|
160
|
+
}
|
|
161
|
+
}, 30000);
|
|
162
|
+
|
|
163
|
+
it("formats view count in readable format", async () => {
|
|
164
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
165
|
+
|
|
166
|
+
if (result?.method === "youtube" && result.content.includes("Views")) {
|
|
167
|
+
// Should have views formatted (e.g., 1.5B, 100M, 10.5K)
|
|
168
|
+
expect(result.content).toMatch(/Views.*\d+(\.\d+)?[KM]?/);
|
|
169
|
+
}
|
|
170
|
+
}, 30000);
|
|
171
|
+
|
|
172
|
+
it("includes upload date when available", async () => {
|
|
173
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
174
|
+
|
|
175
|
+
if (result?.method === "youtube" && result.content.includes("Uploaded")) {
|
|
176
|
+
// Should have date in YYYY-MM-DD format
|
|
177
|
+
expect(result.content).toMatch(/Uploaded.*\d{4}-\d{2}-\d{2}/);
|
|
178
|
+
}
|
|
179
|
+
}, 30000);
|
|
180
|
+
|
|
181
|
+
it("truncates long descriptions", async () => {
|
|
182
|
+
const result = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
183
|
+
|
|
184
|
+
if (result?.method === "youtube" && result.content.includes("Description")) {
|
|
185
|
+
// Description section should exist
|
|
186
|
+
expect(result.content).toContain("## Description");
|
|
187
|
+
}
|
|
188
|
+
}, 30000);
|
|
189
|
+
|
|
190
|
+
it("handles www prefix variations", async () => {
|
|
191
|
+
const withWww = await handleYouTube("https://www.youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
192
|
+
const withoutWww = await handleYouTube("https://youtube.com/watch?v=dQw4w9WgXcQ", 30);
|
|
193
|
+
|
|
194
|
+
expect(withWww).not.toBeNull();
|
|
195
|
+
expect(withoutWww).not.toBeNull();
|
|
196
|
+
expect(withWww?.finalUrl).toBe(withoutWww?.finalUrl);
|
|
197
|
+
}, 30000);
|
|
198
|
+
});
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
import { unlinkSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { ensureTool } from "../../../utils/tools-manager";
|
|
5
|
+
import type { RenderResult, SpecialHandler } from "./types";
|
|
6
|
+
import { finalizeOutput } from "./types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Execute a command and return stdout
|
|
10
|
+
*/
|
|
11
|
+
function exec(
|
|
12
|
+
cmd: string,
|
|
13
|
+
args: string[],
|
|
14
|
+
options?: { timeout?: number; input?: string | Buffer },
|
|
15
|
+
): { stdout: string; stderr: string; ok: boolean } {
|
|
16
|
+
const result = Bun.spawnSync([cmd, ...args], {
|
|
17
|
+
stdin: options?.input ? (options.input as any) : "ignore",
|
|
18
|
+
stdout: "pipe",
|
|
19
|
+
stderr: "pipe",
|
|
20
|
+
});
|
|
21
|
+
return {
|
|
22
|
+
stdout: result.stdout?.toString() ?? "",
|
|
23
|
+
stderr: result.stderr?.toString() ?? "",
|
|
24
|
+
ok: result.exitCode === 0,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface YouTubeUrl {
|
|
29
|
+
videoId: string;
|
|
30
|
+
playlistId?: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Parse YouTube URL into components
|
|
35
|
+
*/
|
|
36
|
+
function parseYouTubeUrl(url: string): YouTubeUrl | null {
|
|
37
|
+
try {
|
|
38
|
+
const parsed = new URL(url);
|
|
39
|
+
const hostname = parsed.hostname.replace(/^www\./, "");
|
|
40
|
+
|
|
41
|
+
// youtube.com/watch?v=VIDEO_ID
|
|
42
|
+
if ((hostname === "youtube.com" || hostname === "m.youtube.com") && parsed.pathname === "/watch") {
|
|
43
|
+
const videoId = parsed.searchParams.get("v");
|
|
44
|
+
const playlistId = parsed.searchParams.get("list") || undefined;
|
|
45
|
+
if (videoId) return { videoId, playlistId };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// youtube.com/v/VIDEO_ID or youtube.com/embed/VIDEO_ID
|
|
49
|
+
if (hostname === "youtube.com" || hostname === "m.youtube.com") {
|
|
50
|
+
const match = parsed.pathname.match(/^\/(v|embed)\/([a-zA-Z0-9_-]{11})/);
|
|
51
|
+
if (match) return { videoId: match[2] };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// youtu.be/VIDEO_ID
|
|
55
|
+
if (hostname === "youtu.be") {
|
|
56
|
+
const videoId = parsed.pathname.slice(1).split("/")[0];
|
|
57
|
+
if (videoId && /^[a-zA-Z0-9_-]{11}$/.test(videoId)) {
|
|
58
|
+
return { videoId };
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// youtube.com/shorts/VIDEO_ID
|
|
63
|
+
if (hostname === "youtube.com" && parsed.pathname.startsWith("/shorts/")) {
|
|
64
|
+
const videoId = parsed.pathname.replace("/shorts/", "").split("/")[0];
|
|
65
|
+
if (videoId && /^[a-zA-Z0-9_-]{11}$/.test(videoId)) {
|
|
66
|
+
return { videoId };
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
} catch {}
|
|
70
|
+
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Clean VTT subtitle content to plain text
|
|
76
|
+
*/
|
|
77
|
+
function cleanVttToText(vtt: string): string {
|
|
78
|
+
const lines = vtt.split("\n");
|
|
79
|
+
const textLines: string[] = [];
|
|
80
|
+
let lastLine = "";
|
|
81
|
+
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
// Skip WEBVTT header, timestamps, and metadata
|
|
84
|
+
if (
|
|
85
|
+
line.startsWith("WEBVTT") ||
|
|
86
|
+
line.startsWith("Kind:") ||
|
|
87
|
+
line.startsWith("Language:") ||
|
|
88
|
+
line.match(/^\d{2}:\d{2}/) || // Timestamp lines
|
|
89
|
+
line.match(/^[a-f0-9-]{36}$/) || // UUID cue identifiers
|
|
90
|
+
line.match(/^\d+$/) || // Numeric cue identifiers
|
|
91
|
+
line.includes("-->") ||
|
|
92
|
+
line.trim() === ""
|
|
93
|
+
) {
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Remove inline timestamp tags like <00:00:01.520>
|
|
98
|
+
let cleaned = line.replace(/<\d{2}:\d{2}:\d{2}\.\d{3}>/g, "");
|
|
99
|
+
// Remove other VTT tags like <c> </c>
|
|
100
|
+
cleaned = cleaned.replace(/<\/?[^>]+>/g, "");
|
|
101
|
+
cleaned = cleaned.trim();
|
|
102
|
+
|
|
103
|
+
// Skip duplicates (auto-generated captions often repeat)
|
|
104
|
+
if (cleaned && cleaned !== lastLine) {
|
|
105
|
+
textLines.push(cleaned);
|
|
106
|
+
lastLine = cleaned;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return textLines.join(" ").replace(/\s+/g, " ").trim();
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Format duration from seconds to human readable
|
|
115
|
+
*/
|
|
116
|
+
function formatDuration(seconds: number): string {
|
|
117
|
+
const h = Math.floor(seconds / 3600);
|
|
118
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
119
|
+
const s = Math.floor(seconds % 60);
|
|
120
|
+
if (h > 0) return `${h}:${m.toString().padStart(2, "0")}:${s.toString().padStart(2, "0")}`;
|
|
121
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Handle YouTube URLs - fetch metadata and transcript
|
|
126
|
+
*/
|
|
127
|
+
export const handleYouTube: SpecialHandler = async (url: string, timeout: number): Promise<RenderResult | null> => {
|
|
128
|
+
const yt = parseYouTubeUrl(url);
|
|
129
|
+
if (!yt) return null;
|
|
130
|
+
|
|
131
|
+
// Ensure yt-dlp is available (auto-download if missing)
|
|
132
|
+
const ytdlp = await ensureTool("yt-dlp", true);
|
|
133
|
+
if (!ytdlp) {
|
|
134
|
+
return {
|
|
135
|
+
url,
|
|
136
|
+
finalUrl: url,
|
|
137
|
+
contentType: "text/plain",
|
|
138
|
+
method: "youtube-no-ytdlp",
|
|
139
|
+
content: "YouTube video detected but yt-dlp could not be installed.",
|
|
140
|
+
fetchedAt: new Date().toISOString(),
|
|
141
|
+
truncated: false,
|
|
142
|
+
notes: ["yt-dlp installation failed"],
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const fetchedAt = new Date().toISOString();
|
|
147
|
+
const notes: string[] = [];
|
|
148
|
+
const videoUrl = `https://www.youtube.com/watch?v=${yt.videoId}`;
|
|
149
|
+
|
|
150
|
+
// Fetch video metadata
|
|
151
|
+
const metaResult = exec(ytdlp, ["--dump-json", "--no-warnings", "--no-playlist", "--skip-download", videoUrl], {
|
|
152
|
+
timeout: timeout * 1000,
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
let title = "YouTube Video";
|
|
156
|
+
let channel = "";
|
|
157
|
+
let description = "";
|
|
158
|
+
let duration = 0;
|
|
159
|
+
let uploadDate = "";
|
|
160
|
+
let viewCount = 0;
|
|
161
|
+
|
|
162
|
+
if (metaResult.ok && metaResult.stdout.trim()) {
|
|
163
|
+
try {
|
|
164
|
+
const meta = JSON.parse(metaResult.stdout) as {
|
|
165
|
+
title?: string;
|
|
166
|
+
channel?: string;
|
|
167
|
+
uploader?: string;
|
|
168
|
+
description?: string;
|
|
169
|
+
duration?: number;
|
|
170
|
+
upload_date?: string;
|
|
171
|
+
view_count?: number;
|
|
172
|
+
};
|
|
173
|
+
title = meta.title || title;
|
|
174
|
+
channel = meta.channel || meta.uploader || "";
|
|
175
|
+
description = meta.description || "";
|
|
176
|
+
duration = meta.duration || 0;
|
|
177
|
+
uploadDate = meta.upload_date || "";
|
|
178
|
+
viewCount = meta.view_count || 0;
|
|
179
|
+
} catch {}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Format upload date
|
|
183
|
+
let formattedDate = "";
|
|
184
|
+
if (uploadDate && uploadDate.length === 8) {
|
|
185
|
+
formattedDate = `${uploadDate.slice(0, 4)}-${uploadDate.slice(4, 6)}-${uploadDate.slice(6, 8)}`;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Try to fetch subtitles
|
|
189
|
+
let transcript = "";
|
|
190
|
+
let transcriptSource = "";
|
|
191
|
+
|
|
192
|
+
// First, list available subtitles
|
|
193
|
+
const listResult = exec(ytdlp, ["--list-subs", "--no-warnings", "--no-playlist", "--skip-download", videoUrl], {
|
|
194
|
+
timeout: timeout * 1000,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
const hasManualSubs = listResult.stdout.includes("[info] Available subtitles");
|
|
198
|
+
const hasAutoSubs = listResult.stdout.includes("[info] Available automatic captions");
|
|
199
|
+
|
|
200
|
+
// Create temp directory for subtitle download
|
|
201
|
+
const tmpDir = tmpdir();
|
|
202
|
+
const tmpBase = path.join(tmpDir, `yt-${yt.videoId}-${Date.now()}`);
|
|
203
|
+
|
|
204
|
+
try {
|
|
205
|
+
// Try manual subtitles first (English preferred)
|
|
206
|
+
if (hasManualSubs) {
|
|
207
|
+
const subResult = exec(
|
|
208
|
+
ytdlp,
|
|
209
|
+
[
|
|
210
|
+
"--write-sub",
|
|
211
|
+
"--sub-lang",
|
|
212
|
+
"en,en-US,en-GB",
|
|
213
|
+
"--sub-format",
|
|
214
|
+
"vtt",
|
|
215
|
+
"--skip-download",
|
|
216
|
+
"--no-warnings",
|
|
217
|
+
"--no-playlist",
|
|
218
|
+
"-o",
|
|
219
|
+
tmpBase,
|
|
220
|
+
videoUrl,
|
|
221
|
+
],
|
|
222
|
+
{ timeout: timeout * 1000 },
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
if (subResult.ok) {
|
|
226
|
+
// Find the downloaded subtitle file using glob
|
|
227
|
+
const subFiles = await Array.fromAsync(new Bun.Glob(`${tmpBase}*.vtt`).scan({ absolute: true }));
|
|
228
|
+
if (subFiles.length > 0) {
|
|
229
|
+
const vttContent = await Bun.file(subFiles[0]).text();
|
|
230
|
+
transcript = cleanVttToText(vttContent);
|
|
231
|
+
transcriptSource = "manual";
|
|
232
|
+
notes.push("Using manual subtitles");
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Fall back to auto-generated captions
|
|
238
|
+
if (!transcript && hasAutoSubs) {
|
|
239
|
+
const autoResult = exec(
|
|
240
|
+
ytdlp,
|
|
241
|
+
[
|
|
242
|
+
"--write-auto-sub",
|
|
243
|
+
"--sub-lang",
|
|
244
|
+
"en,en-US,en-GB",
|
|
245
|
+
"--sub-format",
|
|
246
|
+
"vtt",
|
|
247
|
+
"--skip-download",
|
|
248
|
+
"--no-warnings",
|
|
249
|
+
"--no-playlist",
|
|
250
|
+
"-o",
|
|
251
|
+
tmpBase,
|
|
252
|
+
videoUrl,
|
|
253
|
+
],
|
|
254
|
+
{ timeout: timeout * 1000 },
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
if (autoResult.ok) {
|
|
258
|
+
const subFiles = await Array.fromAsync(new Bun.Glob(`${tmpBase}*.vtt`).scan({ absolute: true }));
|
|
259
|
+
if (subFiles.length > 0) {
|
|
260
|
+
const vttContent = await Bun.file(subFiles[0]).text();
|
|
261
|
+
transcript = cleanVttToText(vttContent);
|
|
262
|
+
transcriptSource = "auto-generated";
|
|
263
|
+
notes.push("Using auto-generated captions");
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
} finally {
|
|
268
|
+
// Cleanup temp files using sync unlink to avoid leaving handles open
|
|
269
|
+
try {
|
|
270
|
+
const tmpFiles = await Array.fromAsync(new Bun.Glob(`${tmpBase}*`).scan({ absolute: true }));
|
|
271
|
+
for (const f of tmpFiles) {
|
|
272
|
+
try {
|
|
273
|
+
unlinkSync(f);
|
|
274
|
+
} catch {}
|
|
275
|
+
}
|
|
276
|
+
} catch {}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Build markdown output
|
|
280
|
+
let md = `# ${title}\n\n`;
|
|
281
|
+
if (channel) md += `**Channel:** ${channel}\n`;
|
|
282
|
+
if (formattedDate) md += `**Uploaded:** ${formattedDate}\n`;
|
|
283
|
+
if (duration > 0) md += `**Duration:** ${formatDuration(duration)}\n`;
|
|
284
|
+
if (viewCount > 0) {
|
|
285
|
+
const formatted =
|
|
286
|
+
viewCount >= 1_000_000
|
|
287
|
+
? `${(viewCount / 1_000_000).toFixed(1)}M`
|
|
288
|
+
: viewCount >= 1_000
|
|
289
|
+
? `${(viewCount / 1_000).toFixed(1)}K`
|
|
290
|
+
: String(viewCount);
|
|
291
|
+
md += `**Views:** ${formatted}\n`;
|
|
292
|
+
}
|
|
293
|
+
md += `**Video ID:** ${yt.videoId}\n\n`;
|
|
294
|
+
|
|
295
|
+
if (description) {
|
|
296
|
+
// Truncate long descriptions
|
|
297
|
+
const descPreview = description.length > 1000 ? `${description.slice(0, 1000)}...` : description;
|
|
298
|
+
md += `---\n\n## Description\n\n${descPreview}\n\n`;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (transcript) {
|
|
302
|
+
md += `---\n\n## Transcript (${transcriptSource})\n\n${transcript}\n`;
|
|
303
|
+
} else {
|
|
304
|
+
notes.push("No subtitles/captions available");
|
|
305
|
+
md += `---\n\n*No transcript available for this video.*\n`;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const output = finalizeOutput(md);
|
|
309
|
+
return {
|
|
310
|
+
url,
|
|
311
|
+
finalUrl: videoUrl,
|
|
312
|
+
contentType: "text/markdown",
|
|
313
|
+
method: "youtube",
|
|
314
|
+
content: output.content,
|
|
315
|
+
fetchedAt,
|
|
316
|
+
truncated: output.truncated,
|
|
317
|
+
notes,
|
|
318
|
+
};
|
|
319
|
+
};
|