getraw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +4 -0
- package/CLAUDE.md +57 -0
- package/README.md +166 -0
- package/RESEARCH.md +109 -0
- package/STATUS.md +23 -0
- package/bun.lock +50 -0
- package/bunfig.toml +3 -0
- package/docs/plugin-guide.md +166 -0
- package/docs/supported-sites.md +41 -0
- package/package.json +30 -0
- package/src/cli/index.ts +52 -0
- package/src/cli/options.ts +97 -0
- package/src/core/format-sorter.ts +208 -0
- package/src/core/logger.ts +101 -0
- package/src/core/orchestrator.ts +140 -0
- package/src/core/output-template.ts +58 -0
- package/src/core/types.ts +237 -0
- package/src/downloaders/base.ts +25 -0
- package/src/downloaders/dash.ts +287 -0
- package/src/downloaders/fragment.ts +226 -0
- package/src/downloaders/hls.ts +170 -0
- package/src/downloaders/http.ts +260 -0
- package/src/extractors/archive-org.ts +126 -0
- package/src/extractors/bandcamp.ts +130 -0
- package/src/extractors/base.ts +29 -0
- package/src/extractors/bilibili/bangumi.ts +205 -0
- package/src/extractors/bilibili/index.ts +233 -0
- package/src/extractors/bilibili/wbi.ts +60 -0
- package/src/extractors/coub.ts +137 -0
- package/src/extractors/dailymotion.ts +99 -0
- package/src/extractors/dropbox.ts +52 -0
- package/src/extractors/generic.ts +118 -0
- package/src/extractors/google-drive.ts +106 -0
- package/src/extractors/imgur.ts +156 -0
- package/src/extractors/instagram/index.ts +263 -0
- package/src/extractors/instagram/reels.ts +166 -0
- package/src/extractors/kick/clips.ts +91 -0
- package/src/extractors/kick/index.ts +118 -0
- package/src/extractors/kick/live.ts +89 -0
- package/src/extractors/niconico/index.ts +209 -0
- package/src/extractors/odysee.ts +126 -0
- package/src/extractors/peertube.ts +143 -0
- package/src/extractors/reddit/gallery.ts +124 -0
- package/src/extractors/reddit/index.ts +203 -0
- package/src/extractors/rumble.ts +127 -0
- package/src/extractors/soundcloud/index.ts +161 -0
- package/src/extractors/soundcloud/playlist.ts +129 -0
- package/src/extractors/spotify.ts +97 -0
- package/src/extractors/streamable.ts +121 -0
- package/src/extractors/ted.ts +151 -0
- package/src/extractors/tiktok/index.ts +207 -0
- package/src/extractors/tiktok/user.ts +176 -0
- package/src/extractors/twitch/clips.ts +125 -0
- package/src/extractors/twitch/index.ts +136 -0
- package/src/extractors/twitch/live.ts +132 -0
- package/src/extractors/twitter/index.ts +140 -0
- package/src/extractors/twitter/spaces.ts +200 -0
- package/src/extractors/vimeo/index.ts +187 -0
- package/src/extractors/youtube/captions.ts +111 -0
- package/src/extractors/youtube/index.ts +252 -0
- package/src/extractors/youtube/innertube.ts +364 -0
- package/src/extractors/youtube/nsig.ts +105 -0
- package/src/extractors/youtube/playlist.ts +227 -0
- package/src/extractors/youtube/signature.ts +163 -0
- package/src/networking/client.ts +311 -0
- package/src/networking/cookies.ts +138 -0
- package/src/networking/proxy.ts +132 -0
- package/src/networking/tls.ts +67 -0
- package/src/networking/user-agents.ts +88 -0
- package/src/postprocessors/base.ts +44 -0
- package/src/postprocessors/extract-audio.ts +98 -0
- package/src/postprocessors/ffmpeg.ts +146 -0
- package/src/postprocessors/merge.ts +102 -0
- package/src/postprocessors/metadata.ts +73 -0
- package/src/postprocessors/sponsorblock.ts +162 -0
- package/src/postprocessors/subtitles.ts +285 -0
- package/src/postprocessors/thumbnails.ts +194 -0
- package/src/utils/sanitize.ts +36 -0
- package/src/utils/traverse.ts +68 -0
- package/tests/core/format-sorter.test.ts +96 -0
- package/tests/core/output-template.test.ts +56 -0
- package/tests/core/types.test.ts +79 -0
- package/tests/unit/downloaders/dash.test.ts +57 -0
- package/tests/unit/downloaders/hls.test.ts +120 -0
- package/tests/unit/downloaders/http.test.ts +114 -0
- package/tests/unit/extractors/bilibili.test.ts +83 -0
- package/tests/unit/extractors/instagram.test.ts +273 -0
- package/tests/unit/extractors/kick.test.ts +85 -0
- package/tests/unit/extractors/misc.test.ts +942 -0
- package/tests/unit/extractors/niconico.test.ts +61 -0
- package/tests/unit/extractors/reddit.test.ts +222 -0
- package/tests/unit/extractors/soundcloud.test.ts +299 -0
- package/tests/unit/extractors/tiktok.test.ts +260 -0
- package/tests/unit/extractors/twitch.test.ts +250 -0
- package/tests/unit/extractors/twitter.test.ts +181 -0
- package/tests/unit/extractors/vimeo.test.ts +253 -0
- package/tests/unit/extractors/youtube.test.ts +259 -0
- package/tests/unit/networking/client.test.ts +272 -0
- package/tests/unit/networking/cookies.test.ts +256 -0
- package/tests/unit/networking/proxy.test.ts +137 -0
- package/tests/unit/postprocessors/extract-audio.test.ts +63 -0
- package/tests/unit/postprocessors/merge.test.ts +61 -0
- package/tests/unit/postprocessors/subtitles.test.ts +89 -0
- package/tools/dashboard.ts +112 -0
- package/tsconfig.json +17 -0
package/STATUS.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# dlpx — Agent Status Board
|
|
2
|
+
|
|
3
|
+
Last updated: 2026-06-16
|
|
4
|
+
|
|
5
|
+
| # | Agent | Scope | Status | Notes |
|
|
6
|
+
|---|-------|-------|--------|-------|
|
|
7
|
+
| 1 | Scaffold | Project structure, types, core infra | DONE | Foundation complete |
|
|
8
|
+
| 2 | YouTube Extractor | YouTube video/playlist extraction | DONE | InnerTube API, sig/nsig decipher, captions, playlists, age-gate bypass, 25 tests passing |
|
|
9
|
+
| 3 | Twitter/X Extractor | Twitter/X video extraction | PENDING | |
|
|
10
|
+
| 4 | Instagram Extractor | Instagram reels/stories/posts | PENDING | |
|
|
11
|
+
| 5 | TikTok Extractor | TikTok video extraction | PENDING | |
|
|
12
|
+
| 6 | Reddit Extractor | Reddit video/gallery extraction | PENDING | |
|
|
13
|
+
| 7 | Twitch Extractor | Twitch VOD/clip extraction | PENDING | |
|
|
14
|
+
| 8 | SoundCloud Extractor | SoundCloud audio extraction | PENDING | |
|
|
15
|
+
| 9 | HLS Downloader | m3u8/HLS stream downloading | DONE | AES-128 decrypt, concurrent segments, master playlist variant selection |
|
|
16
|
+
| 10 | DASH Downloader | MPD/DASH stream downloading | DONE | SegmentTemplate+SegmentList, multi-period, concurrent segments |
|
|
17
|
+
| 11 | FFmpeg PostProcessor | Audio extract, merge, embed | PENDING | |
|
|
18
|
+
| 12 | Subtitle PostProcessor | Subtitle download/convert/embed | PENDING | |
|
|
19
|
+
| 13 | Networking | HTTP client, cookies, auth | PENDING | |
|
|
20
|
+
| 14 | Plugins | Plugin loader and API | PENDING | |
|
|
21
|
+
| 15 | Integration Tests | Cross-module integration tests | PENDING | |
|
|
22
|
+
| 16 | E2E Tests | End-to-end download tests | PENDING | |
|
|
23
|
+
| 17 | Polish & Release | CLI polish, README, binary build | PENDING | |
|
package/bun.lock
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"lockfileVersion": 1,
|
|
3
|
+
"configVersion": 1,
|
|
4
|
+
"workspaces": {
|
|
5
|
+
"": {
|
|
6
|
+
"name": "dlpx",
|
|
7
|
+
"dependencies": {
|
|
8
|
+
"hls-parser": "^0.13.6",
|
|
9
|
+
"mpd-parser": "^1.3.0",
|
|
10
|
+
},
|
|
11
|
+
"devDependencies": {
|
|
12
|
+
"@types/bun": "latest",
|
|
13
|
+
},
|
|
14
|
+
"peerDependencies": {
|
|
15
|
+
"typescript": "^5",
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
"packages": {
|
|
20
|
+
"@babel/runtime": ["@babel/runtime@7.29.7", "", {}, "sha512-Nq8OhGWiZIZGV6hLHoyAKLLcJihP/xFeBMGJoUrxTX2psI8dCifzLhZISFb+VWS3wFMRDmCGw5R+dOySCqPLhw=="],
|
|
21
|
+
|
|
22
|
+
"@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="],
|
|
23
|
+
|
|
24
|
+
"@types/node": ["@types/node@25.9.3", "", { "dependencies": { "undici-types": ">=7.24.0 <7.24.7" } }, "sha512-603BddQMv3pUcr4U2dhujk83N2tTDVr/34wII2B6bJy6g+8WD6yUb11jszNs0gdi4PesVWl7ABt8nYMVpnLUcg=="],
|
|
25
|
+
|
|
26
|
+
"@videojs/vhs-utils": ["@videojs/vhs-utils@4.0.0", "", { "dependencies": { "@babel/runtime": "^7.12.5", "global": "^4.4.0", "url-toolkit": "^2.2.1" } }, "sha512-xJp7Yd4jMLwje2vHCUmi8MOUU76nxiwII3z4Eg3Ucb+6rrkFVGosrXlMgGnaLjq724j3wzNElRZ71D/CKrTtxg=="],
|
|
27
|
+
|
|
28
|
+
"@xmldom/xmldom": ["@xmldom/xmldom@0.8.13", "", {}, "sha512-KRYzxepc14G/CEpEGc3Yn+JKaAeT63smlDr+vjB8jRfgTBBI9wRj/nkQEO+ucV8p8I9bfKLWp37uHgFrbntPvw=="],
|
|
29
|
+
|
|
30
|
+
"bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="],
|
|
31
|
+
|
|
32
|
+
"dom-walk": ["dom-walk@0.1.2", "", {}, "sha512-6QvTW9mrGeIegrFXdtQi9pk7O/nSK6lSdXW2eqUspN5LWD7UTji2Fqw5V2YLjBpHEoU9Xl/eUWNpDeZvoyOv2w=="],
|
|
33
|
+
|
|
34
|
+
"global": ["global@4.4.0", "", { "dependencies": { "min-document": "^2.19.0", "process": "^0.11.10" } }, "sha512-wv/LAoHdRE3BeTGz53FAamhGlPLhlssK45usmGFThIi4XqnBmjKQ16u+RNbP7WvigRZDxUsM0J3gcQ5yicaL0w=="],
|
|
35
|
+
|
|
36
|
+
"hls-parser": ["hls-parser@0.13.6", "", {}, "sha512-I40sl22E2muqeSTpG8kMN2dAegAhubkXPXtnsUXFwdKwZK47d1Q+XwuX32VMZ++AZU5oeQIZqAnGNHxSG1sWaw=="],
|
|
37
|
+
|
|
38
|
+
"min-document": ["min-document@2.19.2", "", { "dependencies": { "dom-walk": "^0.1.0" } }, "sha512-8S5I8db/uZN8r9HSLFVWPdJCvYOejMcEC82VIzNUc6Zkklf/d1gg2psfE79/vyhWOj4+J8MtwmoOz3TmvaGu5A=="],
|
|
39
|
+
|
|
40
|
+
"mpd-parser": ["mpd-parser@1.3.1", "", { "dependencies": { "@babel/runtime": "^7.12.5", "@videojs/vhs-utils": "^4.0.0", "@xmldom/xmldom": "^0.8.3", "global": "^4.4.0" }, "bin": { "mpd-to-m3u8-json": "bin/parse.js" } }, "sha512-1FuyEWI5k2HcmhS1HkKnUAQV7yFPfXPht2DnRRGtoiiAAW+ESTbtEXIDpRkwdU+XyrQuwrIym7UkoPKsZ0SyFw=="],
|
|
41
|
+
|
|
42
|
+
"process": ["process@0.11.10", "", {}, "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A=="],
|
|
43
|
+
|
|
44
|
+
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
|
45
|
+
|
|
46
|
+
"undici-types": ["undici-types@7.24.6", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="],
|
|
47
|
+
|
|
48
|
+
"url-toolkit": ["url-toolkit@2.2.5", "", {}, "sha512-mtN6xk+Nac+oyJ/PrI7tzfmomRVNFIWKUbG8jdYFt52hxbiReFAXIjYskvu64/dvuW71IcB7lV8l0HvZMac6Jg=="],
|
|
49
|
+
}
|
|
50
|
+
}
|
package/bunfig.toml
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Plugin Guide — Writing a Custom Extractor
|
|
2
|
+
|
|
3
|
+
dlpx extractors are classes that extend `BaseExtractor` from `src/core/types.ts`. Once written, they are registered via `registerExtractor` from `src/extractors/base.ts`. dlpx then tries each registered extractor in order before falling back to the generic extractor.
|
|
4
|
+
|
|
5
|
+
## BaseExtractor Interface
|
|
6
|
+
|
|
7
|
+
```ts
|
|
8
|
+
// src/core/types.ts
|
|
9
|
+
|
|
10
|
+
export abstract class BaseExtractor {
|
|
11
|
+
// Regex tested against the URL to decide if this extractor handles it.
|
|
12
|
+
abstract readonly _VALID_URL: RegExp;
|
|
13
|
+
|
|
14
|
+
// Human-readable name used in logs and the extractor field of InfoDict.
|
|
15
|
+
abstract readonly _NAME: string;
|
|
16
|
+
|
|
17
|
+
// Returns true when _VALID_URL matches the URL. Override to add custom logic.
|
|
18
|
+
canHandle(url: string): boolean {
|
|
19
|
+
return this._VALID_URL.test(url);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Public entry point. Calls _real_extract and stamps extractor metadata.
|
|
23
|
+
async extract(url: string): Promise<InfoDict> { ... }
|
|
24
|
+
|
|
25
|
+
// Your implementation goes here.
|
|
26
|
+
protected abstract _real_extract(url: string): Promise<InfoDict>;
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
`_real_extract` must return an `InfoDict`. The key fields are:
|
|
31
|
+
|
|
32
|
+
```ts
|
|
33
|
+
interface InfoDict {
|
|
34
|
+
id: string; // required — unique ID for the media
|
|
35
|
+
title: string; // required
|
|
36
|
+
formats?: Format[]; // list of available streams; mutually exclusive with url
|
|
37
|
+
url?: string; // use this if there is exactly one stream URL
|
|
38
|
+
ext?: string; // file extension when using url
|
|
39
|
+
thumbnails?: Thumbnail[];
|
|
40
|
+
subtitles?: Record<string, Subtitle[]>;
|
|
41
|
+
automatic_captions?: Record<string, Subtitle[]>;
|
|
42
|
+
description?: string;
|
|
43
|
+
uploader?: string;
|
|
44
|
+
duration?: number; // seconds
|
|
45
|
+
// ... see src/core/types.ts for the full list
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Each `Format` describes one downloadable stream:
|
|
50
|
+
|
|
51
|
+
```ts
|
|
52
|
+
interface Format {
|
|
53
|
+
format_id: string; // required
|
|
54
|
+
url: string; // required
|
|
55
|
+
ext: string; // required — e.g. "mp4", "m4a", "webm"
|
|
56
|
+
protocol?: string; // "https", "m3u8", "dash" — controls which downloader is used
|
|
57
|
+
width?: number;
|
|
58
|
+
height?: number;
|
|
59
|
+
fps?: number;
|
|
60
|
+
vcodec?: string; // "avc1.…", "vp9", "none" (audio-only)
|
|
61
|
+
acodec?: string; // "mp4a.…", "opus", "none" (video-only)
|
|
62
|
+
vbr?: number; // video bitrate kbps
|
|
63
|
+
abr?: number; // audio bitrate kbps
|
|
64
|
+
tbr?: number; // total bitrate kbps
|
|
65
|
+
filesize?: number;
|
|
66
|
+
http_headers?: Record<string, string>;
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Minimal Example
|
|
71
|
+
|
|
72
|
+
```ts
|
|
73
|
+
// src/extractors/example.ts
|
|
74
|
+
|
|
75
|
+
import { BaseExtractor, ExtractorError } from "../core/types";
|
|
76
|
+
import type { InfoDict } from "../core/types";
|
|
77
|
+
|
|
78
|
+
export class ExampleExtractor extends BaseExtractor {
|
|
79
|
+
readonly _VALID_URL = /https?:\/\/(?:www\.)?example\.com\/watch\/([a-zA-Z0-9]+)/;
|
|
80
|
+
readonly _NAME = "example";
|
|
81
|
+
|
|
82
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
83
|
+
const match = this._VALID_URL.exec(url);
|
|
84
|
+
if (!match) throw new ExtractorError(`example: invalid URL: ${url}`);
|
|
85
|
+
const videoId = match[1];
|
|
86
|
+
|
|
87
|
+
// Fetch the page or an API endpoint to find the stream URL.
|
|
88
|
+
const apiUrl = `https://example.com/api/videos/${videoId}`;
|
|
89
|
+
const resp = await fetch(apiUrl, {
|
|
90
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
91
|
+
});
|
|
92
|
+
if (!resp.ok) {
|
|
93
|
+
throw new ExtractorError(`example: API returned ${resp.status}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const data = (await resp.json()) as {
|
|
97
|
+
title: string;
|
|
98
|
+
mp4_url: string;
|
|
99
|
+
duration_s: number;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
id: videoId,
|
|
104
|
+
title: data.title,
|
|
105
|
+
url: data.mp4_url,
|
|
106
|
+
ext: "mp4",
|
|
107
|
+
duration: data.duration_s,
|
|
108
|
+
extractor: this._NAME,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
If the site provides multiple quality levels, return them as `formats` instead of a single `url`:
|
|
115
|
+
|
|
116
|
+
```ts
|
|
117
|
+
return {
|
|
118
|
+
id: videoId,
|
|
119
|
+
title: data.title,
|
|
120
|
+
formats: [
|
|
121
|
+
{
|
|
122
|
+
format_id: "1080p",
|
|
123
|
+
url: data.hd_url,
|
|
124
|
+
ext: "mp4",
|
|
125
|
+
width: 1920,
|
|
126
|
+
height: 1080,
|
|
127
|
+
vcodec: "avc1",
|
|
128
|
+
acodec: "mp4a",
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
format_id: "480p",
|
|
132
|
+
url: data.sd_url,
|
|
133
|
+
ext: "mp4",
|
|
134
|
+
width: 854,
|
|
135
|
+
height: 480,
|
|
136
|
+
vcodec: "avc1",
|
|
137
|
+
acodec: "mp4a",
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
};
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Registering the Extractor
|
|
144
|
+
|
|
145
|
+
Import `registerExtractor` from `src/extractors/base.ts` and call it before the orchestrator runs. The standard place is an index file that is imported by the entry point:
|
|
146
|
+
|
|
147
|
+
```ts
|
|
148
|
+
// src/extractors/index.ts (or wherever you collect registrations)
|
|
149
|
+
|
|
150
|
+
import { registerExtractor } from "./base";
|
|
151
|
+
import { ExampleExtractor } from "./example";
|
|
152
|
+
|
|
153
|
+
registerExtractor(new ExampleExtractor());
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Extractors are tested in registration order. Place more-specific extractors before broader ones. The `generic` extractor is always the last fallback and cannot be displaced.
|
|
157
|
+
|
|
158
|
+
## Error Handling
|
|
159
|
+
|
|
160
|
+
Throw `ExtractorError` (from `src/core/types.ts`) for expected failures (unsupported URL shape, API error, content not found). Any other thrown error is automatically wrapped in an `ExtractorError` by the base class before propagating.
|
|
161
|
+
|
|
162
|
+
```ts
|
|
163
|
+
import { ExtractorError } from "../core/types";
|
|
164
|
+
|
|
165
|
+
throw new ExtractorError("example: video is private");
|
|
166
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Supported Sites
|
|
2
|
+
|
|
3
|
+
All extractors implement `BaseExtractor` from `src/core/types.ts`. The `_VALID_URL` regex for each is listed below — dlpx tests URLs against these in registration order, falling back to the `generic` extractor for any `http(s)://` URL.
|
|
4
|
+
|
|
5
|
+
| # | Extractor name | Site | URL pattern | Formats | Notes |
|
|
6
|
+
|---|---------------|------|-------------|---------|-------|
|
|
7
|
+
| 1 | `youtube` | YouTube | `(www\|m\|music).youtube.com/watch?v=`, `youtu.be/<id>`, `youtube.com/shorts/<id>`, `youtube.com/live/<id>`, `youtube.com/embed/<id>`, `youtube.com/v/<id>`, `youtube.com/playlist?list=`, `youtube.com/channel/<id>`, `youtube.com/@<handle>` | DASH (MP4 video + M4A audio), HLS | Subtitles (manual) and auto-generated captions extracted. Age-gated videos attempt TV client bypass. Signature and n-sig deciphering implemented. Playlist and channel enumeration supported. |
|
|
8
|
+
| 2 | `vimeo` | Vimeo | `(www\|player).vimeo.com/<id>`, `vimeo.com/channels/<c>/<id>`, `vimeo.com/groups/<g>/videos/<id>` | HLS, DASH, Progressive MP4 | All CDN variants exposed as separate formats. |
|
|
9
|
+
| 3 | `twitter` | Twitter / X | `(www.)twitter.com/<user>/status/<id>`, `(www.)x.com/<user>/status/<id>` | MP4 (multiple bitrates), HLS | Uses the public syndication API — no auth required. |
|
|
10
|
+
| 4 | `twitter:spaces` | Twitter Spaces | `(www.)twitter.com/i/spaces/<id>`, `(www.)x.com/i/spaces/<id>` | HLS (M3U8) | Uses a guest token obtained from the public API. Spaces must be public. |
|
|
11
|
+
| 5 | `tiktok` | TikTok video | `(www.)tiktok.com/@<user>/video/<id>`, `vm.tiktok.com/<id>` | MP4 (playAddr / downloadAddr) | Parses hydration JSON embedded in the page HTML. |
|
|
12
|
+
| 6 | `tiktok:user` | TikTok user feed | `(www.)tiktok.com/@<username>` | Playlist (entries) | Returns a playlist of video URLs from the user's public profile page. |
|
|
13
|
+
| 7 | `instagram` | Instagram post / reel | `(www.)instagram.com/p/<id>`, `instagram.com/reel/<id>`, `instagram.com/reels/<id>` | MP4 | Parses `__additionalDataLoaded` and `window.__additionalData` from page. Sidecar (multi-image) posts extracted as playlist. |
|
|
14
|
+
| 8 | `instagram:reels` | Instagram Reels feed | `(www.)instagram.com/reels/` | MP4 | Fetches via the internal GraphQL endpoint (`PolarisClipsHomePageQuery`). Returns a playlist. |
|
|
15
|
+
| 9 | `twitch:vod` | Twitch VOD | `(www.)twitch.tv/videos/<id>` | HLS (M3U8, multiple quality levels) | Uses Twitch GQL to get a signed playback access token. |
|
|
16
|
+
| 10 | `twitch:clip` | Twitch Clip | `(www.)twitch.tv/<channel>/clip/<slug>`, `clips.twitch.tv/<slug>` | MP4 (multiple quality levels) | Signed MP4 URLs fetched via GQL. |
|
|
17
|
+
| 11 | `twitch:live` | Twitch Live stream | `(www.)twitch.tv/<channel>` | HLS (M3U8, multiple quality levels) | Errors if channel is offline. Signed via GQL stream playback access token. |
|
|
18
|
+
| 12 | `kick` | Kick VOD | `(www.)kick.com/video/<id>` | HLS (M3U8) | Uses the Kick v1 public API. |
|
|
19
|
+
| 13 | `kick:clips` | Kick Clip | `(www.)kick.com/<channel>/clips/<id>` | MP4 | Uses the Kick v1 clips API. |
|
|
20
|
+
| 14 | `kick:live` | Kick Live stream | `(www.)kick.com/<channel>` | HLS (M3U8) | Errors if channel is not live. |
|
|
21
|
+
| 15 | `reddit` | Reddit video post | `(www.\|old.)reddit.com/r/<sub>/comments/<id>`, `v.redd.it/<id>` | DASH (video) + separate audio URL, MP4 fallback | Audio is extracted from the DASH manifest and presented as a separate format so both can be merged. |
|
|
22
|
+
| 16 | `reddit:gallery` | Reddit gallery post | `(www.\|old.)reddit.com/r/<sub>/comments/<id>`, `reddit.com/gallery/<id>` | Images (JPEG/PNG) and MP4 (gallery items) | Each gallery item is returned as a separate entry in a playlist. |
|
|
23
|
+
| 17 | `soundcloud` | SoundCloud track | `(www.\|m.)soundcloud.com/<user>/<track>` | HLS (M3U8 opus/mp3), HTTP progressive MP3 | client_id is extracted dynamically from the JS bundle. |
|
|
24
|
+
| 18 | `soundcloud:playlist` | SoundCloud set / playlist | `(www.\|m.)soundcloud.com/<user>/sets/<playlist>` | Playlist (entries) | Paginates via `api-v2.soundcloud.com/playlists/<id>/tracks`. |
|
|
25
|
+
| 19 | `bilibili` | Bilibili video | `(www.)bilibili.com/video/BV<id>`, `bilibili.com/video/av<id>` | DASH (video + audio, multiple quality levels from 360p to 8K/HDR/Dolby) | Requires login cookies for 1080p+ qualities. BV-to-AV ID conversion implemented. |
|
|
26
|
+
| 20 | `bilibili:bangumi` | Bilibili Bangumi (anime/series) | `(www.)bilibili.com/bangumi/play/ep<id>`, `bilibili.com/bangumi/play/ss<id>` | DASH (same quality levels as bilibili) | `ep_id` fetches a single episode; `ss_id` fetches the full season as a playlist. |
|
|
27
|
+
| 21 | `niconico` | Niconico | `(www.)nicovideo.jp/watch/sm<id>`, `nicovideo.jp/watch/nm<id>` | HLS (DMS session-based M3U8) | Session cookies from the watch page are forwarded to the DMS API. |
|
|
28
|
+
| 22 | `bandcamp` | Bandcamp track / album | `<artist>.bandcamp.com/track/<slug>`, `<artist>.bandcamp.com/album/<slug>`, `bandcamp.com/EmbeddedPlayer/*` | MP3 (stream URL from `trackinfo` JSON) | Album URLs return a playlist of tracks. |
|
|
29
|
+
| 23 | `dailymotion` | Dailymotion | `(www.)dailymotion.com/video/<id>` | HLS (M3U8) | Uses the Dailymotion player metadata API. |
|
|
30
|
+
| 24 | `streamable` | Streamable | `(www.)streamable.com/<id>` | MP4 (multiple resolutions), HLS | Parses the player JSON from `api.streamable.com/videos/<id>`. |
|
|
31
|
+
| 25 | `coub` | Coub | `(www.)coub.com/view/<id>`, `coub.com/embed/<id>` | MP4 (video), MP3 (audio), GIF | Video loop and audio are separate; both formats are exposed. |
|
|
32
|
+
| 26 | `imgur` | Imgur | `imgur.com/<id>`, `imgur.com/a/<id>` (album), `imgur.com/gallery/<id>`, `i.imgur.com/<id>.<ext>` | MP4, GIF (converted), JPEG/PNG | Direct image/video links, single items, and albums all handled. Albums return a playlist. |
|
|
33
|
+
| 27 | `rumble` | Rumble | `(www.)rumble.com/v<slug>.html`, `rumble.com/embed/<id>` | MP4 (multiple resolutions) | Parses the embedded player JSON. |
|
|
34
|
+
| 28 | `odysee` | Odysee / LBRY | `(www.)odysee.com/@<channel>:<tag>/<slug>`, `lbry.tv/@<channel>:<tag>/<slug>` | MP4, HLS (if present) | Uses the Odysee API v3 to resolve claim URLs. |
|
|
35
|
+
| 29 | `ted` | TED Talks | `(www.)ted.com/talks/<slug>` | MP4 (multiple qualities via `playerData.resources.h264`) | Subtitles extracted from `subtitledDownloads` (SRT). |
|
|
36
|
+
| 30 | `peertube` | PeerTube (any instance) | `<host>/videos/watch/<uuid>`, `<host>/w/<uuid>`, `<host>/videos/embed/<uuid>` | WebTorrent (MP4), HLS | Subtitles (VTT) extracted from the captions API. Works with any PeerTube instance. |
|
|
37
|
+
| 31 | `google-drive` | Google Drive | `drive.google.com/file/d/<id>`, `docs.google.com/file/d/<id>`, `drive.google.com/uc?id=<id>` | MP4 (itag-based URLs) | Public files only. Parses the Drive streaming page for itag format entries. |
|
|
38
|
+
| 32 | `dropbox` | Dropbox | `(www.)dropbox.com/s/<id>`, `dropbox.com/sh/<id>`, `dropbox.com/scl/fo/<id>` | Direct download URL (any type) | Rewrites the `dl=0` query parameter to `dl=1` for direct download. |
|
|
39
|
+
| 33 | `archive.org` | Internet Archive | `(www.)archive.org/details/<id>`, `archive.org/download/<id>` | MP4, OGV, WEBM, MP3, and any other files hosted on the item | All media files listed under the item are returned as formats. |
|
|
40
|
+
| 34 | `spotify` | Spotify Podcast | `open.spotify.com/episode/<id>` | MP3 (30-second preview only) | Full episode audio requires Spotify auth, which is not implemented. Preview URL is from `previewUrl` in the episode API. |
|
|
41
|
+
| 35 | `generic` | Generic fallback | Any `http://` or `https://` URL | Depends on the target | Fetches the page and looks for `<video src>`, `<source src>`, and `og:video` meta tags. Last resort. |
|
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "getraw",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Fast media downloader CLI built natively in Bun/TypeScript",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"dlpx": "./src/cli/index.ts"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"dev": "bun run src/cli/index.ts",
|
|
11
|
+
"test": "bun test",
|
|
12
|
+
"build": "bun build src/cli/index.ts --compile --outfile=dlpx",
|
|
13
|
+
"dashboard": "bun run tools/dashboard.ts"
|
|
14
|
+
},
|
|
15
|
+
"dependencies": {
|
|
16
|
+
"hls-parser": "^0.13.6",
|
|
17
|
+
"mpd-parser": "^1.3.0"
|
|
18
|
+
},
|
|
19
|
+
"devDependencies": {
|
|
20
|
+
"@types/bun": "latest"
|
|
21
|
+
},
|
|
22
|
+
"peerDependencies": {
|
|
23
|
+
"typescript": "^5"
|
|
24
|
+
},
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "https://github.com/web3mikee/dlpx"
|
|
29
|
+
}
|
|
30
|
+
}
|
package/src/cli/index.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
import { parseArgs, printHelp } from "./options";
|
|
3
|
+
import { Orchestrator } from "../core/orchestrator";
|
|
4
|
+
import { logger } from "../core/logger";
|
|
5
|
+
|
|
6
|
+
const VERSION = "0.0.0";
|
|
7
|
+
|
|
8
|
+
async function main(): Promise<void> {
|
|
9
|
+
const args = process.argv.slice(2);
|
|
10
|
+
const options = parseArgs(args);
|
|
11
|
+
|
|
12
|
+
if (options.help) {
|
|
13
|
+
printHelp();
|
|
14
|
+
process.exit(0);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (options.version) {
|
|
18
|
+
process.stdout.write(`dlpx ${VERSION}\n`);
|
|
19
|
+
process.exit(0);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (options.verbose) {
|
|
23
|
+
logger.setLevel("debug");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (options.quiet) {
|
|
27
|
+
logger.setQuiet(true);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (options.urls.length === 0) {
|
|
31
|
+
logger.error("No URLs provided. Use --help for usage.");
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const orchestrator = new Orchestrator();
|
|
36
|
+
|
|
37
|
+
for (const url of options.urls) {
|
|
38
|
+
try {
|
|
39
|
+
await orchestrator.process(url, options);
|
|
40
|
+
} catch (err) {
|
|
41
|
+
logger.error(
|
|
42
|
+
err instanceof Error ? err.message : String(err),
|
|
43
|
+
);
|
|
44
|
+
if (options.verbose && err instanceof Error && err.stack) {
|
|
45
|
+
logger.debug(err.stack);
|
|
46
|
+
}
|
|
47
|
+
process.exit(1);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
main();
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import type { Options } from "../core/types";
|
|
2
|
+
import { DEFAULT_OPTIONS } from "../core/types";
|
|
3
|
+
|
|
4
|
+
interface FlagDef {
|
|
5
|
+
long: string;
|
|
6
|
+
short?: string;
|
|
7
|
+
description: string;
|
|
8
|
+
type: "boolean" | "string" | "number";
|
|
9
|
+
key: keyof Options;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const FLAG_DEFS: FlagDef[] = [
|
|
13
|
+
{ long: "--format", short: "-f", description: "Format selection string", type: "string", key: "format" },
|
|
14
|
+
{ long: "--output", short: "-o", description: "Output filename template", type: "string", key: "output" },
|
|
15
|
+
{ long: "--extract-audio", short: "-x", description: "Extract audio only", type: "boolean", key: "extractAudio" },
|
|
16
|
+
{ long: "--audio-format", description: "Audio format (mp3, aac, flac, etc.)", type: "string", key: "audioFormat" },
|
|
17
|
+
{ long: "--audio-quality", description: "Audio quality (0-10 or bitrate)", type: "string", key: "audioQuality" },
|
|
18
|
+
{ long: "--write-subs", description: "Write subtitles to file", type: "boolean", key: "writeSubs" },
|
|
19
|
+
{ long: "--sub-langs", description: "Subtitle languages", type: "string", key: "subLangs" },
|
|
20
|
+
{ long: "--list-formats", short: "-F", description: "List available formats", type: "boolean", key: "listFormats" },
|
|
21
|
+
{ long: "--dump-json", short: "-j", description: "Dump info JSON to stdout", type: "boolean", key: "dumpJson" },
|
|
22
|
+
{ long: "--quiet", short: "-q", description: "Suppress output", type: "boolean", key: "quiet" },
|
|
23
|
+
{ long: "--verbose", short: "-v", description: "Verbose output", type: "boolean", key: "verbose" },
|
|
24
|
+
{ long: "--no-progress", description: "Disable progress bar", type: "boolean", key: "noProgress" },
|
|
25
|
+
{ long: "--retries", short: "-R", description: "Number of retries", type: "number", key: "retries" },
|
|
26
|
+
{ long: "--rate-limit", short: "-r", description: "Rate limit in bytes/sec", type: "number", key: "rateLimit" },
|
|
27
|
+
{ long: "--proxy", description: "Proxy URL", type: "string", key: "proxy" },
|
|
28
|
+
{ long: "--cookies", description: "Cookie file path", type: "string", key: "cookies" },
|
|
29
|
+
{ long: "--user-agent", description: "Custom User-Agent", type: "string", key: "userAgent" },
|
|
30
|
+
{ long: "--referer", description: "Custom Referer", type: "string", key: "referer" },
|
|
31
|
+
{ long: "--embed-thumbnail", description: "Embed thumbnail in output", type: "boolean", key: "embedThumbnail" },
|
|
32
|
+
{ long: "--embed-subs", description: "Embed subtitles in output", type: "boolean", key: "embedSubs" },
|
|
33
|
+
{ long: "--merge-output-format", description: "Output container for merging", type: "string", key: "mergeOutputFormat" },
|
|
34
|
+
{ long: "--ffmpeg-location", description: "Path to ffmpeg binary", type: "string", key: "ffmpegLocation" },
|
|
35
|
+
{ long: "--version", short: "-V", description: "Print version", type: "boolean", key: "version" },
|
|
36
|
+
{ long: "--help", short: "-h", description: "Show help", type: "boolean", key: "help" },
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
export function parseArgs(args: string[]): Options {
|
|
40
|
+
const options: Options = { ...DEFAULT_OPTIONS, paths: { ...DEFAULT_OPTIONS.paths }, urls: [] };
|
|
41
|
+
let i = 0;
|
|
42
|
+
|
|
43
|
+
while (i < args.length) {
|
|
44
|
+
const arg = args[i];
|
|
45
|
+
|
|
46
|
+
if (!arg.startsWith("-")) {
|
|
47
|
+
options.urls.push(arg);
|
|
48
|
+
i++;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const def = FLAG_DEFS.find((d) => d.long === arg || d.short === arg);
|
|
53
|
+
|
|
54
|
+
if (!def) {
|
|
55
|
+
options.urls.push(arg);
|
|
56
|
+
i++;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (def.type === "boolean") {
|
|
61
|
+
(options as Record<string, unknown>)[def.key] = true;
|
|
62
|
+
i++;
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const value = args[i + 1];
|
|
67
|
+
if (value === undefined) {
|
|
68
|
+
throw new Error(`Missing value for ${arg}`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (def.type === "number") {
|
|
72
|
+
(options as Record<string, unknown>)[def.key] = Number(value);
|
|
73
|
+
} else {
|
|
74
|
+
(options as Record<string, unknown>)[def.key] = value;
|
|
75
|
+
}
|
|
76
|
+
i += 2;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return options;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function printHelp(): void {
|
|
83
|
+
const lines = [
|
|
84
|
+
"dlpx — Fast media downloader",
|
|
85
|
+
"",
|
|
86
|
+
"Usage: dlpx [OPTIONS] URL [URL...]",
|
|
87
|
+
"",
|
|
88
|
+
"Options:",
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
for (const def of FLAG_DEFS) {
|
|
92
|
+
const flags = def.short ? `${def.short}, ${def.long}` : ` ${def.long}`;
|
|
93
|
+
lines.push(` ${flags.padEnd(28)} ${def.description}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
process.stdout.write(lines.join("\n") + "\n");
|
|
97
|
+
}
|