getraw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.gitattributes +4 -0
  2. package/CLAUDE.md +57 -0
  3. package/README.md +166 -0
  4. package/RESEARCH.md +109 -0
  5. package/STATUS.md +23 -0
  6. package/bun.lock +50 -0
  7. package/bunfig.toml +3 -0
  8. package/docs/plugin-guide.md +166 -0
  9. package/docs/supported-sites.md +41 -0
  10. package/package.json +30 -0
  11. package/src/cli/index.ts +52 -0
  12. package/src/cli/options.ts +97 -0
  13. package/src/core/format-sorter.ts +208 -0
  14. package/src/core/logger.ts +101 -0
  15. package/src/core/orchestrator.ts +140 -0
  16. package/src/core/output-template.ts +58 -0
  17. package/src/core/types.ts +237 -0
  18. package/src/downloaders/base.ts +25 -0
  19. package/src/downloaders/dash.ts +287 -0
  20. package/src/downloaders/fragment.ts +226 -0
  21. package/src/downloaders/hls.ts +170 -0
  22. package/src/downloaders/http.ts +260 -0
  23. package/src/extractors/archive-org.ts +126 -0
  24. package/src/extractors/bandcamp.ts +130 -0
  25. package/src/extractors/base.ts +29 -0
  26. package/src/extractors/bilibili/bangumi.ts +205 -0
  27. package/src/extractors/bilibili/index.ts +233 -0
  28. package/src/extractors/bilibili/wbi.ts +60 -0
  29. package/src/extractors/coub.ts +137 -0
  30. package/src/extractors/dailymotion.ts +99 -0
  31. package/src/extractors/dropbox.ts +52 -0
  32. package/src/extractors/generic.ts +118 -0
  33. package/src/extractors/google-drive.ts +106 -0
  34. package/src/extractors/imgur.ts +156 -0
  35. package/src/extractors/instagram/index.ts +263 -0
  36. package/src/extractors/instagram/reels.ts +166 -0
  37. package/src/extractors/kick/clips.ts +91 -0
  38. package/src/extractors/kick/index.ts +118 -0
  39. package/src/extractors/kick/live.ts +89 -0
  40. package/src/extractors/niconico/index.ts +209 -0
  41. package/src/extractors/odysee.ts +126 -0
  42. package/src/extractors/peertube.ts +143 -0
  43. package/src/extractors/reddit/gallery.ts +124 -0
  44. package/src/extractors/reddit/index.ts +203 -0
  45. package/src/extractors/rumble.ts +127 -0
  46. package/src/extractors/soundcloud/index.ts +161 -0
  47. package/src/extractors/soundcloud/playlist.ts +129 -0
  48. package/src/extractors/spotify.ts +97 -0
  49. package/src/extractors/streamable.ts +121 -0
  50. package/src/extractors/ted.ts +151 -0
  51. package/src/extractors/tiktok/index.ts +207 -0
  52. package/src/extractors/tiktok/user.ts +176 -0
  53. package/src/extractors/twitch/clips.ts +125 -0
  54. package/src/extractors/twitch/index.ts +136 -0
  55. package/src/extractors/twitch/live.ts +132 -0
  56. package/src/extractors/twitter/index.ts +140 -0
  57. package/src/extractors/twitter/spaces.ts +200 -0
  58. package/src/extractors/vimeo/index.ts +187 -0
  59. package/src/extractors/youtube/captions.ts +111 -0
  60. package/src/extractors/youtube/index.ts +252 -0
  61. package/src/extractors/youtube/innertube.ts +364 -0
  62. package/src/extractors/youtube/nsig.ts +105 -0
  63. package/src/extractors/youtube/playlist.ts +227 -0
  64. package/src/extractors/youtube/signature.ts +163 -0
  65. package/src/networking/client.ts +311 -0
  66. package/src/networking/cookies.ts +138 -0
  67. package/src/networking/proxy.ts +132 -0
  68. package/src/networking/tls.ts +67 -0
  69. package/src/networking/user-agents.ts +88 -0
  70. package/src/postprocessors/base.ts +44 -0
  71. package/src/postprocessors/extract-audio.ts +98 -0
  72. package/src/postprocessors/ffmpeg.ts +146 -0
  73. package/src/postprocessors/merge.ts +102 -0
  74. package/src/postprocessors/metadata.ts +73 -0
  75. package/src/postprocessors/sponsorblock.ts +162 -0
  76. package/src/postprocessors/subtitles.ts +285 -0
  77. package/src/postprocessors/thumbnails.ts +194 -0
  78. package/src/utils/sanitize.ts +36 -0
  79. package/src/utils/traverse.ts +68 -0
  80. package/tests/core/format-sorter.test.ts +96 -0
  81. package/tests/core/output-template.test.ts +56 -0
  82. package/tests/core/types.test.ts +79 -0
  83. package/tests/unit/downloaders/dash.test.ts +57 -0
  84. package/tests/unit/downloaders/hls.test.ts +120 -0
  85. package/tests/unit/downloaders/http.test.ts +114 -0
  86. package/tests/unit/extractors/bilibili.test.ts +83 -0
  87. package/tests/unit/extractors/instagram.test.ts +273 -0
  88. package/tests/unit/extractors/kick.test.ts +85 -0
  89. package/tests/unit/extractors/misc.test.ts +942 -0
  90. package/tests/unit/extractors/niconico.test.ts +61 -0
  91. package/tests/unit/extractors/reddit.test.ts +222 -0
  92. package/tests/unit/extractors/soundcloud.test.ts +299 -0
  93. package/tests/unit/extractors/tiktok.test.ts +260 -0
  94. package/tests/unit/extractors/twitch.test.ts +250 -0
  95. package/tests/unit/extractors/twitter.test.ts +181 -0
  96. package/tests/unit/extractors/vimeo.test.ts +253 -0
  97. package/tests/unit/extractors/youtube.test.ts +259 -0
  98. package/tests/unit/networking/client.test.ts +272 -0
  99. package/tests/unit/networking/cookies.test.ts +256 -0
  100. package/tests/unit/networking/proxy.test.ts +137 -0
  101. package/tests/unit/postprocessors/extract-audio.test.ts +63 -0
  102. package/tests/unit/postprocessors/merge.test.ts +61 -0
  103. package/tests/unit/postprocessors/subtitles.test.ts +89 -0
  104. package/tools/dashboard.ts +112 -0
  105. package/tsconfig.json +17 -0
@@ -0,0 +1,942 @@
1
+ import { describe, expect, test, mock } from "bun:test";
2
+ import { DailymotionExtractor } from "../../../src/extractors/dailymotion";
3
+ import { RumbleExtractor } from "../../../src/extractors/rumble";
4
+ import { BandcampExtractor } from "../../../src/extractors/bandcamp";
5
+ import { SpotifyExtractor } from "../../../src/extractors/spotify";
6
+ import { PeerTubeExtractor } from "../../../src/extractors/peertube";
7
+ import { OdyseeExtractor } from "../../../src/extractors/odysee";
8
+ import { StreamableExtractor } from "../../../src/extractors/streamable";
9
+ import { ImgurExtractor } from "../../../src/extractors/imgur";
10
+ import { CoubExtractor } from "../../../src/extractors/coub";
11
+ import { TEDExtractor } from "../../../src/extractors/ted";
12
+ import { ArchiveOrgExtractor } from "../../../src/extractors/archive-org";
13
+ import { DropboxExtractor } from "../../../src/extractors/dropbox";
14
+ import { GoogleDriveExtractor } from "../../../src/extractors/google-drive";
15
+
16
+ describe("DailymotionExtractor URL matching", () => {
17
+ const extractor = new DailymotionExtractor();
18
+
19
+ const validUrls = [
20
+ "https://www.dailymotion.com/video/x7xvpcd",
21
+ "https://www.dailymotion.com/video/x7xvpcd_some-title",
22
+ "http://www.dailymotion.com/video/abc123",
23
+ ];
24
+
25
+ const invalidUrls = [
26
+ "https://www.youtube.com/watch?v=abc",
27
+ "https://dailymotion.com/",
28
+ "https://www.dailymotion.com/",
29
+ "https://vimeo.com/123",
30
+ ];
31
+
32
+ for (const url of validUrls) {
33
+ test(`matches: ${url}`, () => {
34
+ expect(extractor.canHandle(url)).toBe(true);
35
+ });
36
+ }
37
+
38
+ for (const url of invalidUrls) {
39
+ test(`rejects: ${url}`, () => {
40
+ expect(extractor.canHandle(url)).toBe(false);
41
+ });
42
+ }
43
+
44
+ test("has correct extractor name", () => {
45
+ expect(extractor._NAME).toBe("dailymotion");
46
+ });
47
+ });
48
+
49
+ describe("DailymotionExtractor parsing", () => {
50
+ test("parses metadata and formats from API response", async () => {
51
+ const extractor = new DailymotionExtractor();
52
+
53
+ const mockApiResponse = {
54
+ id: "x7xvpcd",
55
+ title: "Test Video",
56
+ description: "A test video",
57
+ duration: 120,
58
+ owner: { screenname: "TestUser", id: "usr123" },
59
+ created_time: 1700000000,
60
+ views_total: 5000,
61
+ likes_total: 200,
62
+ thumbnail_url: "https://s1.dmcdn.net/thumb.jpg",
63
+ qualities: {
64
+ "720": [{ type: "video/mp4", url: "https://cdn.dmcdn.net/720p.mp4" }],
65
+ "480": [{ type: "video/mp4", url: "https://cdn.dmcdn.net/480p.mp4" }],
66
+ auto: [{ type: "application/x-mpegURL", url: "https://cdn.dmcdn.net/master.m3u8" }],
67
+ },
68
+ };
69
+
70
+ global.fetch = mock(() =>
71
+ Promise.resolve(new Response(JSON.stringify(mockApiResponse), { status: 200 }))
72
+ ) as typeof fetch;
73
+
74
+ const info = await extractor.extract("https://www.dailymotion.com/video/x7xvpcd");
75
+
76
+ expect(info.id).toBe("x7xvpcd");
77
+ expect(info.title).toBe("Test Video");
78
+ expect(info.duration).toBe(120);
79
+ expect(info.uploader).toBe("TestUser");
80
+ expect(info.view_count).toBe(5000);
81
+ expect(info.formats).toBeDefined();
82
+ expect(info.formats!.length).toBeGreaterThan(0);
83
+ expect(info.formats!.some((f) => f.ext === "mp4")).toBe(true);
84
+ expect(info.thumbnails).toBeDefined();
85
+ expect(info.thumbnails![0].url).toBe("https://s1.dmcdn.net/thumb.jpg");
86
+ });
87
+ });
88
+
89
+ describe("RumbleExtractor URL matching", () => {
90
+ const extractor = new RumbleExtractor();
91
+
92
+ const validUrls = [
93
+ "https://rumble.com/vabc123-some-title.html",
94
+ "https://www.rumble.com/vabc123-test.html",
95
+ "https://rumble.com/embed/vabc123",
96
+ ];
97
+
98
+ const invalidUrls = [
99
+ "https://www.youtube.com/watch?v=abc",
100
+ "https://rumble.com/",
101
+ "https://rumble.com/user/",
102
+ ];
103
+
104
+ for (const url of validUrls) {
105
+ test(`matches: ${url}`, () => {
106
+ expect(extractor.canHandle(url)).toBe(true);
107
+ });
108
+ }
109
+
110
+ for (const url of invalidUrls) {
111
+ test(`rejects: ${url}`, () => {
112
+ expect(extractor.canHandle(url)).toBe(false);
113
+ });
114
+ }
115
+
116
+ test("has correct extractor name", () => {
117
+ expect(extractor._NAME).toBe("rumble");
118
+ });
119
+ });
120
+
121
+ describe("BandcampExtractor URL matching", () => {
122
+ const extractor = new BandcampExtractor();
123
+
124
+ const validUrls = [
125
+ "https://artist.bandcamp.com/track/some-track",
126
+ "https://artist.bandcamp.com/album/some-album",
127
+ "https://someartist.bandcamp.com/track/my-song",
128
+ ];
129
+
130
+ const invalidUrls = [
131
+ "https://www.youtube.com/watch?v=abc",
132
+ "https://bandcamp.com/",
133
+ "https://artist.bandcamp.com/",
134
+ ];
135
+
136
+ for (const url of validUrls) {
137
+ test(`matches: ${url}`, () => {
138
+ expect(extractor.canHandle(url)).toBe(true);
139
+ });
140
+ }
141
+
142
+ for (const url of invalidUrls) {
143
+ test(`rejects: ${url}`, () => {
144
+ expect(extractor.canHandle(url)).toBe(false);
145
+ });
146
+ }
147
+
148
+ test("has correct extractor name", () => {
149
+ expect(extractor._NAME).toBe("bandcamp");
150
+ });
151
+ });
152
+
153
+ describe("BandcampExtractor parsing", () => {
154
+ test("parses single track from data-tralbum", async () => {
155
+ const extractor = new BandcampExtractor();
156
+
157
+ const tralbumData = {
158
+ current: { id: 12345, title: "My Track", type: "track" },
159
+ artist: "Test Artist",
160
+ art_id: 999,
161
+ trackinfo: [
162
+ {
163
+ id: 12345,
164
+ title: "My Track",
165
+ duration: 180,
166
+ has_audio: true,
167
+ track_num: 1,
168
+ file: { "mp3-128": "https://t4.bcbits.com/stream/abc123" },
169
+ },
170
+ ],
171
+ };
172
+
173
+ const htmlContent = `<html><head><title>My Track by Test Artist</title></head><body>
174
+ <div data-tralbum="${JSON.stringify(tralbumData).replace(/"/g, "&quot;")}"></div>
175
+ </body></html>`;
176
+
177
+ global.fetch = mock(() =>
178
+ Promise.resolve(new Response(htmlContent, { status: 200 }))
179
+ ) as typeof fetch;
180
+
181
+ const info = await extractor.extract("https://artist.bandcamp.com/track/my-track");
182
+
183
+ expect(info.id).toBe("12345");
184
+ expect(info.title).toBe("My Track");
185
+ expect(info.uploader).toBe("Test Artist");
186
+ expect(info.duration).toBe(180);
187
+ expect(info.formats).toBeDefined();
188
+ expect(info.formats!.length).toBeGreaterThan(0);
189
+ expect(info.formats![0].ext).toBe("mp3");
190
+ });
191
+
192
+ test("parses album as playlist", async () => {
193
+ const extractor = new BandcampExtractor();
194
+
195
+ const tralbumData = {
196
+ current: { id: 99999, title: "My Album", type: "album" },
197
+ artist: "Test Artist",
198
+ trackinfo: [
199
+ {
200
+ id: 1,
201
+ title: "Track One",
202
+ duration: 120,
203
+ has_audio: true,
204
+ track_num: 1,
205
+ file: { "mp3-128": "https://t4.bcbits.com/stream/track1" },
206
+ },
207
+ {
208
+ id: 2,
209
+ title: "Track Two",
210
+ duration: 200,
211
+ has_audio: true,
212
+ track_num: 2,
213
+ file: { "mp3-128": "https://t4.bcbits.com/stream/track2" },
214
+ },
215
+ ],
216
+ };
217
+
218
+ const htmlContent = `<html><body>
219
+ <div data-tralbum="${JSON.stringify(tralbumData).replace(/"/g, "&quot;")}"></div>
220
+ </body></html>`;
221
+
222
+ global.fetch = mock(() =>
223
+ Promise.resolve(new Response(htmlContent, { status: 200 }))
224
+ ) as typeof fetch;
225
+
226
+ const info = await extractor.extract("https://artist.bandcamp.com/album/my-album");
227
+
228
+ expect(info._type).toBe("playlist");
229
+ expect(info.entries).toHaveLength(2);
230
+ expect(info.playlist_count).toBe(2);
231
+ expect(info.entries![0].title).toBe("Track One");
232
+ expect(info.entries![1].title).toBe("Track Two");
233
+ });
234
+ });
235
+
236
+ describe("SpotifyExtractor URL matching", () => {
237
+ const extractor = new SpotifyExtractor();
238
+
239
+ const validUrls = [
240
+ "https://open.spotify.com/episode/5678abc",
241
+ "https://open.spotify.com/episode/1234567890abcdef",
242
+ ];
243
+
244
+ const invalidUrls = [
245
+ "https://open.spotify.com/track/abc",
246
+ "https://open.spotify.com/album/abc",
247
+ "https://open.spotify.com/",
248
+ "https://www.youtube.com/watch?v=abc",
249
+ ];
250
+
251
+ for (const url of validUrls) {
252
+ test(`matches: ${url}`, () => {
253
+ expect(extractor.canHandle(url)).toBe(true);
254
+ });
255
+ }
256
+
257
+ for (const url of invalidUrls) {
258
+ test(`rejects: ${url}`, () => {
259
+ expect(extractor.canHandle(url)).toBe(false);
260
+ });
261
+ }
262
+
263
+ test("has correct extractor name", () => {
264
+ expect(extractor._NAME).toBe("spotify");
265
+ });
266
+ });
267
+
268
+ describe("SpotifyExtractor parsing", () => {
269
+ test("extracts audio preview from page data", async () => {
270
+ const extractor = new SpotifyExtractor();
271
+
272
+ const nextData = {
273
+ props: {
274
+ pageProps: {
275
+ episode: {
276
+ name: "Test Episode",
277
+ description: "A podcast episode",
278
+ duration_ms: 3600000,
279
+ audio_preview_url: "https://p.scdn.co/mp3-preview/abc123",
280
+ show: { name: "Test Podcast" },
281
+ images: [{ url: "https://i.scdn.co/image/thumb.jpg", width: 300, height: 300 }],
282
+ },
283
+ },
284
+ },
285
+ };
286
+
287
+ const html = `<html><head></head><body>
288
+ <script id="__NEXT_DATA__" type="application/json">${JSON.stringify(nextData)}</script>
289
+ </body></html>`;
290
+
291
+ global.fetch = mock(() =>
292
+ Promise.resolve(new Response(html, { status: 200 }))
293
+ ) as typeof fetch;
294
+
295
+ const info = await extractor.extract("https://open.spotify.com/episode/5678abc");
296
+
297
+ expect(info.id).toBe("5678abc");
298
+ expect(info.title).toBe("Test Episode");
299
+ expect(info.uploader).toBe("Test Podcast");
300
+ expect(info.duration).toBe(3600);
301
+ expect(info.url).toBe("https://p.scdn.co/mp3-preview/abc123");
302
+ expect(info.formats![0].format_note).toContain("preview");
303
+ });
304
+ });
305
+
306
+ describe("PeerTubeExtractor URL matching", () => {
307
+ const extractor = new PeerTubeExtractor();
308
+
309
+ const validUrls = [
310
+ "https://peertube.social/videos/watch/abc123-def456",
311
+ "https://video.ploud.fr/videos/watch/abc123",
312
+ "https://peertube.example.com/w/abc123",
313
+ "https://instance.tld/videos/embed/abc123",
314
+ ];
315
+
316
+ const invalidUrls = [
317
+ "https://www.youtube.com/watch?v=abc",
318
+ "https://peertube.social/",
319
+ "https://peertube.social/api/v1/videos",
320
+ ];
321
+
322
+ for (const url of validUrls) {
323
+ test(`matches: ${url}`, () => {
324
+ expect(extractor.canHandle(url)).toBe(true);
325
+ });
326
+ }
327
+
328
+ for (const url of invalidUrls) {
329
+ test(`rejects: ${url}`, () => {
330
+ expect(extractor.canHandle(url)).toBe(false);
331
+ });
332
+ }
333
+
334
+ test("has correct extractor name", () => {
335
+ expect(extractor._NAME).toBe("peertube");
336
+ });
337
+ });
338
+
339
+ describe("PeerTubeExtractor parsing", () => {
340
+ test("parses video files and HLS streams", async () => {
341
+ const extractor = new PeerTubeExtractor();
342
+
343
+ const apiResponse = {
344
+ uuid: "abc123-def456",
345
+ name: "PeerTube Video",
346
+ description: "A video on PeerTube",
347
+ duration: 300,
348
+ views: 1000,
349
+ likes: 50,
350
+ publishedAt: "2024-01-01T00:00:00.000Z",
351
+ thumbnailUrl: "/static/thumbnails/abc.jpg",
352
+ account: { displayName: "Test User", name: "testuser", url: "https://peertube.social/accounts/testuser" },
353
+ channel: { displayName: "Test Channel", name: "testchannel", url: "https://peertube.social/c/testchannel" },
354
+ files: [
355
+ {
356
+ fileUrl: "https://peertube.social/static/web-videos/abc-1080.mp4",
357
+ resolution: { id: 1080, label: "1080p" },
358
+ size: 100000000,
359
+ fps: 30,
360
+ },
361
+ {
362
+ fileUrl: "https://peertube.social/static/web-videos/abc-720.mp4",
363
+ resolution: { id: 720, label: "720p" },
364
+ size: 50000000,
365
+ fps: 30,
366
+ },
367
+ ],
368
+ streamingPlaylists: [
369
+ { playlistUrl: "https://peertube.social/static/streaming-playlists/hls/abc/master.m3u8", type: 1 },
370
+ ],
371
+ };
372
+
373
+ global.fetch = mock(() =>
374
+ Promise.resolve(new Response(JSON.stringify(apiResponse), { status: 200 }))
375
+ ) as typeof fetch;
376
+
377
+ const info = await extractor.extract("https://peertube.social/videos/watch/abc123-def456");
378
+
379
+ expect(info.id).toBe("abc123-def456");
380
+ expect(info.title).toBe("PeerTube Video");
381
+ expect(info.duration).toBe(300);
382
+ expect(info.uploader).toBe("Test User");
383
+ expect(info.formats).toBeDefined();
384
+ const mp4Formats = info.formats!.filter((f) => !f.protocol);
385
+ expect(mp4Formats.length).toBe(2);
386
+ expect(mp4Formats.some((f) => f.height === 1080)).toBe(true);
387
+ const hlsFormats = info.formats!.filter((f) => f.protocol === "m3u8");
388
+ expect(hlsFormats.length).toBe(1);
389
+ });
390
+ });
391
+
392
+ describe("OdyseeExtractor URL matching", () => {
393
+ const extractor = new OdyseeExtractor();
394
+
395
+ const validUrls = [
396
+ "https://odysee.com/@SomeChannel:a/video-title:b",
397
+ "https://www.odysee.com/@Channel:c/some-video:d",
398
+ "https://lbry.tv/@Channel:a/video:b",
399
+ ];
400
+
401
+ const invalidUrls = [
402
+ "https://www.youtube.com/watch?v=abc",
403
+ "https://odysee.com/",
404
+ "https://odysee.com/@channel",
405
+ ];
406
+
407
+ for (const url of validUrls) {
408
+ test(`matches: ${url}`, () => {
409
+ expect(extractor.canHandle(url)).toBe(true);
410
+ });
411
+ }
412
+
413
+ for (const url of invalidUrls) {
414
+ test(`rejects: ${url}`, () => {
415
+ expect(extractor.canHandle(url)).toBe(false);
416
+ });
417
+ }
418
+
419
+ test("has correct extractor name", () => {
420
+ expect(extractor._NAME).toBe("odysee");
421
+ });
422
+ });
423
+
424
+ describe("StreamableExtractor URL matching", () => {
425
+ const extractor = new StreamableExtractor();
426
+
427
+ const validUrls = [
428
+ "https://streamable.com/abc123",
429
+ "https://www.streamable.com/xyz789",
430
+ ];
431
+
432
+ const invalidUrls = [
433
+ "https://www.youtube.com/watch?v=abc",
434
+ "https://streamable.com/",
435
+ "https://notstreamable.com/abc",
436
+ ];
437
+
438
+ for (const url of validUrls) {
439
+ test(`matches: ${url}`, () => {
440
+ expect(extractor.canHandle(url)).toBe(true);
441
+ });
442
+ }
443
+
444
+ for (const url of invalidUrls) {
445
+ test(`rejects: ${url}`, () => {
446
+ expect(extractor.canHandle(url)).toBe(false);
447
+ });
448
+ }
449
+
450
+ test("has correct extractor name", () => {
451
+ expect(extractor._NAME).toBe("streamable");
452
+ });
453
+ });
454
+
455
+ describe("StreamableExtractor parsing", () => {
456
+ test("extracts video sources from __NEXT_DATA__", async () => {
457
+ const extractor = new StreamableExtractor();
458
+
459
+ const nextData = {
460
+ props: {
461
+ pageProps: {
462
+ video: {
463
+ title: "Streamable Test",
464
+ thumbnail_url: "//cdn.streamable.com/thumb.jpg",
465
+ duration: 15,
466
+ files: {
467
+ mp4: { url: "//cdn.streamable.com/video/mp4/abc123.mp4", width: 1920, height: 1080, bitrate: 5000 },
468
+ "mp4-mobile": { url: "//cdn.streamable.com/video/mp4-mobile/abc123.mp4", width: 720, height: 480 },
469
+ },
470
+ },
471
+ },
472
+ },
473
+ };
474
+
475
+ const html = `<html><head><title>Streamable Test</title></head><body>
476
+ <script id="__NEXT_DATA__" type="application/json">${JSON.stringify(nextData)}</script>
477
+ </body></html>`;
478
+
479
+ global.fetch = mock(() =>
480
+ Promise.resolve(new Response(html, { status: 200 }))
481
+ ) as typeof fetch;
482
+
483
+ const info = await extractor.extract("https://streamable.com/abc123");
484
+
485
+ expect(info.id).toBe("abc123");
486
+ expect(info.title).toBe("Streamable Test");
487
+ expect(info.formats).toBeDefined();
488
+ expect(info.formats!.some((f) => f.height === 1080)).toBe(true);
489
+ expect(info.formats!.every((f) => f.url.startsWith("https://"))).toBe(true);
490
+ });
491
+ });
492
+
493
+ describe("ImgurExtractor URL matching", () => {
494
+ const extractor = new ImgurExtractor();
495
+
496
+ const validUrls = [
497
+ "https://imgur.com/a/abc123",
498
+ "https://imgur.com/gallery/xyz789",
499
+ "https://imgur.com/abc123",
500
+ "https://i.imgur.com/abc123.gifv",
501
+ "https://i.imgur.com/abc123.mp4",
502
+ "https://i.imgur.com/abc123.gif",
503
+ ];
504
+
505
+ const invalidUrls = [
506
+ "https://www.youtube.com/watch?v=abc",
507
+ "https://imgur.com/",
508
+ "https://notimgur.com/abc",
509
+ ];
510
+
511
+ for (const url of validUrls) {
512
+ test(`matches: ${url}`, () => {
513
+ expect(extractor.canHandle(url)).toBe(true);
514
+ });
515
+ }
516
+
517
+ for (const url of invalidUrls) {
518
+ test(`rejects: ${url}`, () => {
519
+ expect(extractor.canHandle(url)).toBe(false);
520
+ });
521
+ }
522
+
523
+ test("has correct extractor name", () => {
524
+ expect(extractor._NAME).toBe("imgur");
525
+ });
526
+
527
+ test("converts .gifv URL to .mp4 directly", async () => {
528
+ const extractor2 = new ImgurExtractor();
529
+ const gifvUrl = "https://i.imgur.com/abc123.gifv";
530
+
531
+ const info = await extractor2.extract(gifvUrl);
532
+ expect(info.formats![0].url).toBe("https://i.imgur.com/abc123.mp4");
533
+ expect(info.formats![0].ext).toBe("mp4");
534
+ });
535
+ });
536
+
537
+ describe("CoubExtractor URL matching", () => {
538
+ const extractor = new CoubExtractor();
539
+
540
+ const validUrls = [
541
+ "https://coub.com/view/abc123",
542
+ "https://www.coub.com/view/xyz_789",
543
+ "https://coub.com/embed/abc123",
544
+ ];
545
+
546
+ const invalidUrls = [
547
+ "https://www.youtube.com/watch?v=abc",
548
+ "https://coub.com/",
549
+ "https://notcoub.com/view/abc",
550
+ ];
551
+
552
+ for (const url of validUrls) {
553
+ test(`matches: ${url}`, () => {
554
+ expect(extractor.canHandle(url)).toBe(true);
555
+ });
556
+ }
557
+
558
+ for (const url of invalidUrls) {
559
+ test(`rejects: ${url}`, () => {
560
+ expect(extractor.canHandle(url)).toBe(false);
561
+ });
562
+ }
563
+
564
+ test("has correct extractor name", () => {
565
+ expect(extractor._NAME).toBe("coub");
566
+ });
567
+ });
568
+
569
+ describe("CoubExtractor parsing", () => {
570
+ test("parses video and audio as separate formats", async () => {
571
+ const extractor = new CoubExtractor();
572
+
573
+ const apiResponse = {
574
+ id: 12345,
575
+ title: "Funny Coub",
576
+ duration: 10.5,
577
+ views_count: 10000,
578
+ likes_count: 500,
579
+ created_at: "2024-01-15T10:00:00.000Z",
580
+ channel: { title: "CoubCreator", permalink: "coubcreator" },
581
+ file_versions: {
582
+ html5: {
583
+ video: {
584
+ high: { url: "https://coubstorage.com/get/coubs/high.mp4", size: 5000000 },
585
+ med: { url: "https://coubstorage.com/get/coubs/med.mp4", size: 2000000 },
586
+ },
587
+ audio: {
588
+ high: { url: "https://coubstorage.com/get/coubs/audio.mp4", size: 1000000 },
589
+ },
590
+ },
591
+ },
592
+ image_versions: {
593
+ template: "https://coubstorage.com/img/%{version}.jpg",
594
+ versions: ["big"],
595
+ },
596
+ };
597
+
598
+ global.fetch = mock(() =>
599
+ Promise.resolve(new Response(JSON.stringify(apiResponse), { status: 200 }))
600
+ ) as typeof fetch;
601
+
602
+ const info = await extractor.extract("https://coub.com/view/abc123");
603
+
604
+ expect(info.id).toBe("12345");
605
+ expect(info.title).toBe("Funny Coub");
606
+ expect(info.uploader).toBe("CoubCreator");
607
+ expect(info.formats).toBeDefined();
608
+
609
+ const videoFormats = info.formats!.filter((f) => f.vcodec !== "none" && f.acodec === "none");
610
+ expect(videoFormats.length).toBeGreaterThan(0);
611
+
612
+ const audioFormats = info.formats!.filter((f) => f.vcodec === "none");
613
+ expect(audioFormats.length).toBeGreaterThan(0);
614
+
615
+ expect(info.formats!.some((f) => f.format_note?.includes("merge"))).toBe(true);
616
+ });
617
+ });
618
+
619
+ describe("TEDExtractor URL matching", () => {
620
+ const extractor = new TEDExtractor();
621
+
622
+ const validUrls = [
623
+ "https://www.ted.com/talks/some_speaker_talk_title",
624
+ "https://ted.com/talks/another_great_talk",
625
+ ];
626
+
627
+ const invalidUrls = [
628
+ "https://www.youtube.com/watch?v=abc",
629
+ "https://ted.com/",
630
+ "https://ted.com/playlists/abc",
631
+ "https://notted.com/talks/abc",
632
+ ];
633
+
634
+ for (const url of validUrls) {
635
+ test(`matches: ${url}`, () => {
636
+ expect(extractor.canHandle(url)).toBe(true);
637
+ });
638
+ }
639
+
640
+ for (const url of invalidUrls) {
641
+ test(`rejects: ${url}`, () => {
642
+ expect(extractor.canHandle(url)).toBe(false);
643
+ });
644
+ }
645
+
646
+ test("has correct extractor name", () => {
647
+ expect(extractor._NAME).toBe("ted");
648
+ });
649
+ });
650
+
651
+ describe("TEDExtractor parsing", () => {
652
+ test("parses talk data with video formats and subtitles", async () => {
653
+ const extractor = new TEDExtractor();
654
+
655
+ const playerData = {
656
+ resources: {
657
+ h264: [
658
+ { bitrate: 2500, file: "https://download.ted.com/talks/talk-1080p.mp4", height: 1080, width: 1920 },
659
+ { bitrate: 1200, file: "https://download.ted.com/talks/talk-720p.mp4", height: 720, width: 1280 },
660
+ ],
661
+ hls: { stream: "https://hls.ted.com/talks/talk.m3u8" },
662
+ },
663
+ duration: 900,
664
+ thumb: "https://pe.tedcdn.com/thumb.jpg",
665
+ };
666
+
667
+ const nextData = {
668
+ props: {
669
+ pageProps: {
670
+ talkData: {
671
+ id: 9876,
672
+ slug: "awesome_talk",
673
+ title: "An Amazing Talk",
674
+ description: "A fascinating presentation.",
675
+ duration: 900,
676
+ viewedCount: 500000,
677
+ publishedAt: "2024-03-15T00:00:00.000Z",
678
+ speakers: [{ firstname: "Jane", lastname: "Doe" }],
679
+ playerData: JSON.stringify(playerData),
680
+ subtitledDownloads: {
681
+ en: { high: "https://download.ted.com/talks/en.srt", name: "English" },
682
+ es: { high: "https://download.ted.com/talks/es.srt", name: "Spanish" },
683
+ },
684
+ },
685
+ },
686
+ },
687
+ };
688
+
689
+ const html = `<html><head><title>An Amazing Talk | TED Talk</title></head><body>
690
+ <script id="__NEXT_DATA__" type="application/json">${JSON.stringify(nextData)}</script>
691
+ </body></html>`;
692
+
693
+ global.fetch = mock(() =>
694
+ Promise.resolve(new Response(html, { status: 200 }))
695
+ ) as typeof fetch;
696
+
697
+ const info = await extractor.extract("https://www.ted.com/talks/awesome_talk");
698
+
699
+ expect(info.id).toBe("9876");
700
+ expect(info.title).toBe("An Amazing Talk");
701
+ expect(info.uploader).toBe("Jane Doe");
702
+ expect(info.duration).toBe(900);
703
+ expect(info.formats).toBeDefined();
704
+ expect(info.formats!.some((f) => f.height === 1080)).toBe(true);
705
+ expect(info.formats!.some((f) => f.protocol === "m3u8")).toBe(true);
706
+ expect(info.subtitles).toBeDefined();
707
+ expect(Object.keys(info.subtitles!)).toContain("en");
708
+ expect(Object.keys(info.subtitles!)).toContain("es");
709
+ });
710
+ });
711
+
712
+ describe("ArchiveOrgExtractor URL matching", () => {
713
+ const extractor = new ArchiveOrgExtractor();
714
+
715
+ const validUrls = [
716
+ "https://archive.org/details/BigBuckBunny_124",
717
+ "https://www.archive.org/details/some-video",
718
+ "https://archive.org/download/some-item",
719
+ ];
720
+
721
+ const invalidUrls = [
722
+ "https://www.youtube.com/watch?v=abc",
723
+ "https://archive.org/",
724
+ "https://archive.org/search",
725
+ "https://notarchive.org/details/abc",
726
+ ];
727
+
728
+ for (const url of validUrls) {
729
+ test(`matches: ${url}`, () => {
730
+ expect(extractor.canHandle(url)).toBe(true);
731
+ });
732
+ }
733
+
734
+ for (const url of invalidUrls) {
735
+ test(`rejects: ${url}`, () => {
736
+ expect(extractor.canHandle(url)).toBe(false);
737
+ });
738
+ }
739
+
740
+ test("has correct extractor name", () => {
741
+ expect(extractor._NAME).toBe("archive.org");
742
+ });
743
+ });
744
+
745
+ describe("ArchiveOrgExtractor parsing", () => {
746
+ test("parses video and audio files from metadata API", async () => {
747
+ const extractor = new ArchiveOrgExtractor();
748
+
749
+ const apiResponse = {
750
+ metadata: {
751
+ identifier: "BigBuckBunny_124",
752
+ title: "Big Buck Bunny",
753
+ creator: "Blender Foundation",
754
+ date: "2008-04-10",
755
+ description: "Big Buck Bunny short film",
756
+ },
757
+ server: "ia800100.us.archive.org",
758
+ dir: "/0/items/BigBuckBunny_124",
759
+ files: [
760
+ { name: "BigBuckBunny.mp4", format: "h.264", size: "276134947", height: "1080", width: "1920", source: "original" },
761
+ { name: "BigBuckBunny.ogv", format: "Ogg Video", size: "200000000", source: "derivative" },
762
+ { name: "BigBuckBunny_thumb.jpg", format: "Thumbnail", size: "5000" },
763
+ ],
764
+ };
765
+
766
+ global.fetch = mock(() =>
767
+ Promise.resolve(new Response(JSON.stringify(apiResponse), { status: 200 }))
768
+ ) as typeof fetch;
769
+
770
+ const info = await extractor.extract("https://archive.org/details/BigBuckBunny_124");
771
+
772
+ expect(info.id).toBe("BigBuckBunny_124");
773
+ expect(info.title).toBe("Big Buck Bunny");
774
+ expect(info.uploader).toBe("Blender Foundation");
775
+ expect(info.formats).toBeDefined();
776
+ expect(info.formats!.some((f) => f.format_id === "BigBuckBunny.mp4")).toBe(true);
777
+ expect(info.formats!.some((f) => f.quality === 2)).toBe(true);
778
+ expect(info.formats![0].url).toContain("ia800100.us.archive.org");
779
+ });
780
+ });
781
+
782
+ describe("DropboxExtractor URL matching", () => {
783
+ const extractor = new DropboxExtractor();
784
+
785
+ const validUrls = [
786
+ "https://www.dropbox.com/s/abc123/video.mp4?dl=0",
787
+ "https://dropbox.com/s/xyz789/file.mp4",
788
+ "https://www.dropbox.com/sh/abc/def/video.mov",
789
+ ];
790
+
791
+ const invalidUrls = [
792
+ "https://www.youtube.com/watch?v=abc",
793
+ "https://dropbox.com/",
794
+ "https://dropbox.com/home",
795
+ "https://notdropbox.com/s/abc",
796
+ ];
797
+
798
+ for (const url of validUrls) {
799
+ test(`matches: ${url}`, () => {
800
+ expect(extractor.canHandle(url)).toBe(true);
801
+ });
802
+ }
803
+
804
+ for (const url of invalidUrls) {
805
+ test(`rejects: ${url}`, () => {
806
+ expect(extractor.canHandle(url)).toBe(false);
807
+ });
808
+ }
809
+
810
+ test("has correct extractor name", () => {
811
+ expect(extractor._NAME).toBe("dropbox");
812
+ });
813
+ });
814
+
815
+ describe("DropboxExtractor parsing", () => {
816
+ test("converts share URL to direct download URL", async () => {
817
+ const extractor = new DropboxExtractor();
818
+
819
+ global.fetch = mock(() =>
820
+ Promise.resolve(
821
+ new Response(null, {
822
+ status: 200,
823
+ headers: {
824
+ "content-type": "video/mp4",
825
+ "content-length": "52428800",
826
+ },
827
+ })
828
+ )
829
+ ) as typeof fetch;
830
+
831
+ const info = await extractor.extract("https://www.dropbox.com/s/abc123/my-video.mp4?dl=0");
832
+
833
+ expect(info.id).toBeDefined();
834
+ expect(info.title).toBe("my video");
835
+ expect(info.formats).toBeDefined();
836
+ expect(info.formats![0].url).toContain("dl=1");
837
+ expect(info.formats![0].ext).toBe("mp4");
838
+ });
839
+ });
840
+
841
+ describe("GoogleDriveExtractor URL matching", () => {
842
+ const extractor = new GoogleDriveExtractor();
843
+
844
+ const validUrls = [
845
+ "https://drive.google.com/file/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs/view",
846
+ "https://docs.google.com/file/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs/view",
847
+ "https://drive.google.com/open?id=1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs",
848
+ "https://drive.google.com/uc?id=1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs&export=download",
849
+ ];
850
+
851
+ const invalidUrls = [
852
+ "https://www.youtube.com/watch?v=abc",
853
+ "https://drive.google.com/",
854
+ "https://docs.google.com/spreadsheets/d/abc/edit",
855
+ "https://google.com/drive/abc",
856
+ ];
857
+
858
+ for (const url of validUrls) {
859
+ test(`matches: ${url}`, () => {
860
+ expect(extractor.canHandle(url)).toBe(true);
861
+ });
862
+ }
863
+
864
+ for (const url of invalidUrls) {
865
+ test(`rejects: ${url}`, () => {
866
+ expect(extractor.canHandle(url)).toBe(false);
867
+ });
868
+ }
869
+
870
+ test("has correct extractor name", () => {
871
+ expect(extractor._NAME).toBe("google-drive");
872
+ });
873
+ });
874
+
875
+ describe("GoogleDriveExtractor parsing", () => {
876
+ test("constructs direct download URL for small files", async () => {
877
+ const extractor = new GoogleDriveExtractor();
878
+
879
+ global.fetch = mock(() =>
880
+ Promise.resolve(
881
+ new Response(null, {
882
+ status: 200,
883
+ headers: {
884
+ "content-type": "video/mp4",
885
+ "content-disposition": 'attachment; filename="test-video.mp4"',
886
+ "content-length": "10485760",
887
+ },
888
+ })
889
+ )
890
+ ) as typeof fetch;
891
+
892
+ const info = await extractor.extract("https://drive.google.com/file/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs/view");
893
+
894
+ expect(info.id).toBe("1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs");
895
+ expect(info.title).toBe("test-video");
896
+ expect(info.ext).toBe("mp4");
897
+ expect(info.formats![0].url).toContain("drive.google.com/uc");
898
+ expect(info.formats![0].url).toContain("export=download");
899
+ expect(info.formats![0].filesize).toBe(10485760);
900
+ });
901
+
902
+ test("handles virus scan confirm page for large files", async () => {
903
+ const extractor = new GoogleDriveExtractor();
904
+
905
+ const virusScanHtml = `<html><body>
906
+ <form action="/uc?id=BIGFILE&export=download&confirm=t&uuid=abc123">
907
+ <a href="/uc?id=BIGFILE&export=download&confirm=t">Download anyway</a>
908
+ </form>
909
+ <p>Google Drive can't scan this file for viruses.</p>
910
+ <a href="?confirm=t&id=BIGFILE">Download</a>
911
+ </body></html>`;
912
+
913
+ let callCount = 0;
914
+ global.fetch = mock(() => {
915
+ callCount++;
916
+ if (callCount === 1) {
917
+ return Promise.resolve(
918
+ new Response(virusScanHtml, {
919
+ status: 200,
920
+ headers: { "content-type": "text/html" },
921
+ })
922
+ );
923
+ }
924
+ return Promise.resolve(
925
+ new Response(null, {
926
+ status: 200,
927
+ headers: {
928
+ "content-type": "video/mp4",
929
+ "content-disposition": 'attachment; filename="large-file.mp4"',
930
+ "content-length": "1073741824",
931
+ },
932
+ })
933
+ );
934
+ }) as typeof fetch;
935
+
936
+ const info = await extractor.extract("https://drive.google.com/file/d/BIGFILE/view");
937
+
938
+ expect(info.id).toBe("BIGFILE");
939
+ expect(info.ext).toBe("mp4");
940
+ expect(info.formats![0].url).toContain("confirm=t");
941
+ });
942
+ });