getraw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.gitattributes +4 -0
  2. package/CLAUDE.md +57 -0
  3. package/README.md +166 -0
  4. package/RESEARCH.md +109 -0
  5. package/STATUS.md +23 -0
  6. package/bun.lock +50 -0
  7. package/bunfig.toml +3 -0
  8. package/docs/plugin-guide.md +166 -0
  9. package/docs/supported-sites.md +41 -0
  10. package/package.json +30 -0
  11. package/src/cli/index.ts +52 -0
  12. package/src/cli/options.ts +97 -0
  13. package/src/core/format-sorter.ts +208 -0
  14. package/src/core/logger.ts +101 -0
  15. package/src/core/orchestrator.ts +140 -0
  16. package/src/core/output-template.ts +58 -0
  17. package/src/core/types.ts +237 -0
  18. package/src/downloaders/base.ts +25 -0
  19. package/src/downloaders/dash.ts +287 -0
  20. package/src/downloaders/fragment.ts +226 -0
  21. package/src/downloaders/hls.ts +170 -0
  22. package/src/downloaders/http.ts +260 -0
  23. package/src/extractors/archive-org.ts +126 -0
  24. package/src/extractors/bandcamp.ts +130 -0
  25. package/src/extractors/base.ts +29 -0
  26. package/src/extractors/bilibili/bangumi.ts +205 -0
  27. package/src/extractors/bilibili/index.ts +233 -0
  28. package/src/extractors/bilibili/wbi.ts +60 -0
  29. package/src/extractors/coub.ts +137 -0
  30. package/src/extractors/dailymotion.ts +99 -0
  31. package/src/extractors/dropbox.ts +52 -0
  32. package/src/extractors/generic.ts +118 -0
  33. package/src/extractors/google-drive.ts +106 -0
  34. package/src/extractors/imgur.ts +156 -0
  35. package/src/extractors/instagram/index.ts +263 -0
  36. package/src/extractors/instagram/reels.ts +166 -0
  37. package/src/extractors/kick/clips.ts +91 -0
  38. package/src/extractors/kick/index.ts +118 -0
  39. package/src/extractors/kick/live.ts +89 -0
  40. package/src/extractors/niconico/index.ts +209 -0
  41. package/src/extractors/odysee.ts +126 -0
  42. package/src/extractors/peertube.ts +143 -0
  43. package/src/extractors/reddit/gallery.ts +124 -0
  44. package/src/extractors/reddit/index.ts +203 -0
  45. package/src/extractors/rumble.ts +127 -0
  46. package/src/extractors/soundcloud/index.ts +161 -0
  47. package/src/extractors/soundcloud/playlist.ts +129 -0
  48. package/src/extractors/spotify.ts +97 -0
  49. package/src/extractors/streamable.ts +121 -0
  50. package/src/extractors/ted.ts +151 -0
  51. package/src/extractors/tiktok/index.ts +207 -0
  52. package/src/extractors/tiktok/user.ts +176 -0
  53. package/src/extractors/twitch/clips.ts +125 -0
  54. package/src/extractors/twitch/index.ts +136 -0
  55. package/src/extractors/twitch/live.ts +132 -0
  56. package/src/extractors/twitter/index.ts +140 -0
  57. package/src/extractors/twitter/spaces.ts +200 -0
  58. package/src/extractors/vimeo/index.ts +187 -0
  59. package/src/extractors/youtube/captions.ts +111 -0
  60. package/src/extractors/youtube/index.ts +252 -0
  61. package/src/extractors/youtube/innertube.ts +364 -0
  62. package/src/extractors/youtube/nsig.ts +105 -0
  63. package/src/extractors/youtube/playlist.ts +227 -0
  64. package/src/extractors/youtube/signature.ts +163 -0
  65. package/src/networking/client.ts +311 -0
  66. package/src/networking/cookies.ts +138 -0
  67. package/src/networking/proxy.ts +132 -0
  68. package/src/networking/tls.ts +67 -0
  69. package/src/networking/user-agents.ts +88 -0
  70. package/src/postprocessors/base.ts +44 -0
  71. package/src/postprocessors/extract-audio.ts +98 -0
  72. package/src/postprocessors/ffmpeg.ts +146 -0
  73. package/src/postprocessors/merge.ts +102 -0
  74. package/src/postprocessors/metadata.ts +73 -0
  75. package/src/postprocessors/sponsorblock.ts +162 -0
  76. package/src/postprocessors/subtitles.ts +285 -0
  77. package/src/postprocessors/thumbnails.ts +194 -0
  78. package/src/utils/sanitize.ts +36 -0
  79. package/src/utils/traverse.ts +68 -0
  80. package/tests/core/format-sorter.test.ts +96 -0
  81. package/tests/core/output-template.test.ts +56 -0
  82. package/tests/core/types.test.ts +79 -0
  83. package/tests/unit/downloaders/dash.test.ts +57 -0
  84. package/tests/unit/downloaders/hls.test.ts +120 -0
  85. package/tests/unit/downloaders/http.test.ts +114 -0
  86. package/tests/unit/extractors/bilibili.test.ts +83 -0
  87. package/tests/unit/extractors/instagram.test.ts +273 -0
  88. package/tests/unit/extractors/kick.test.ts +85 -0
  89. package/tests/unit/extractors/misc.test.ts +942 -0
  90. package/tests/unit/extractors/niconico.test.ts +61 -0
  91. package/tests/unit/extractors/reddit.test.ts +222 -0
  92. package/tests/unit/extractors/soundcloud.test.ts +299 -0
  93. package/tests/unit/extractors/tiktok.test.ts +260 -0
  94. package/tests/unit/extractors/twitch.test.ts +250 -0
  95. package/tests/unit/extractors/twitter.test.ts +181 -0
  96. package/tests/unit/extractors/vimeo.test.ts +253 -0
  97. package/tests/unit/extractors/youtube.test.ts +259 -0
  98. package/tests/unit/networking/client.test.ts +272 -0
  99. package/tests/unit/networking/cookies.test.ts +256 -0
  100. package/tests/unit/networking/proxy.test.ts +137 -0
  101. package/tests/unit/postprocessors/extract-audio.test.ts +63 -0
  102. package/tests/unit/postprocessors/merge.test.ts +61 -0
  103. package/tests/unit/postprocessors/subtitles.test.ts +89 -0
  104. package/tools/dashboard.ts +112 -0
  105. package/tsconfig.json +17 -0
package/.gitattributes ADDED
@@ -0,0 +1,4 @@
1
+ * text=auto
2
+ *.ts text eol=lf
3
+ *.json text eol=lf
4
+ *.md text eol=lf
package/CLAUDE.md ADDED
@@ -0,0 +1,57 @@
1
+ # dlpx
2
+
3
+ Fast media downloader CLI — yt-dlp replacement built natively in Bun/TypeScript.
4
+
5
+ ## Bun
6
+
7
+ Default to using Bun instead of Node.js.
8
+
9
+ - `bun <file>` instead of `node <file>`
10
+ - `bun test` instead of jest/vitest
11
+ - `bun build` instead of webpack/esbuild
12
+ - `bun install` instead of npm/yarn/pnpm install
13
+ - `bunx <package>` instead of `npx`
14
+ - Bun automatically loads .env, no dotenv needed
15
+
16
+ ### Bun APIs
17
+
18
+ - `Bun.file()` over `node:fs` readFile/writeFile
19
+ - `Bun.$` for shell commands (instead of execa)
20
+ - `bun:sqlite` for SQLite (not better-sqlite3)
21
+ - `bun:test` for testing
22
+ - `WebSocket` is built-in (not ws)
23
+
24
+ ## Git
25
+
26
+ - One-liner commit messages ONLY
27
+ - NO co-authored-by lines ever
28
+ - Conventional commits: `feat:`, `fix:`, `chore:`, `refactor:`, `test:`, `docs:`
29
+ - No multi-line commit bodies
30
+
31
+ ## Code Style
32
+
33
+ - TypeScript strict mode, no `any` types
34
+ - No unnecessary comments or docstrings
35
+ - No console.log in production code (use logger)
36
+ - Named imports only, no wildcard *
37
+
38
+ ## Architecture
39
+
40
+ - `src/cli/` — CLI entry point and options
41
+ - `src/core/` — orchestrator, types, format sorter
42
+ - `src/extractors/` — site-specific extractors (all implement BaseExtractor)
43
+ - `src/downloaders/` — protocol handlers (HTTP, HLS, DASH)
44
+ - `src/postprocessors/` — FFmpeg wrappers
45
+ - `src/networking/` — HTTP client, cookies, TLS
46
+ - `src/plugins/` — plugin loader and types
47
+ - `src/utils/` — shared helpers
48
+ - `tests/` — unit, integration, e2e tests
49
+ - `tools/` — dashboard and dev utilities
50
+
51
+ ## Source of Truth
52
+
53
+ 1. EXTRACTOR.md (in parent dir) — project plan, do not modify
54
+ 2. src/core/types.ts — shared interfaces, changes need QA approval
55
+ 3. STATUS.md — real-time progress
56
+ 4. package.json — deps and scripts
57
+ 5. Git main branch — merged truth
package/README.md ADDED
@@ -0,0 +1,166 @@
1
+ # dlpx
2
+
3
+ Fast media downloader CLI built natively in Bun/TypeScript.
4
+
5
+ ## Installation
6
+
7
+ ### Global install (Bun required)
8
+
9
+ ```sh
10
+ bun install -g dlpx
11
+ ```
12
+
13
+ ### From source
14
+
15
+ ```sh
16
+ git clone https://github.com/web3mikee/dlpx
17
+ cd dlpx
18
+ bun install
19
+ ```
20
+
21
+ Run directly from source:
22
+
23
+ ```sh
24
+ bun run src/cli/index.ts <URL>
25
+ ```
26
+
27
+ Build a standalone binary:
28
+
29
+ ```sh
30
+ bun run build
31
+ ./dlpx <URL>
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ Download a video at best quality:
37
+
38
+ ```sh
39
+ dlpx https://www.youtube.com/watch?v=dQw4w9WgXcQ
40
+ ```
41
+
42
+ Extract audio as MP3:
43
+
44
+ ```sh
45
+ dlpx -x --audio-format mp3 https://soundcloud.com/artist/track
46
+ ```
47
+
48
+ List all available formats before downloading:
49
+
50
+ ```sh
51
+ dlpx -F https://vimeo.com/123456789
52
+ ```
53
+
54
+ Download a specific format and write subtitles:
55
+
56
+ ```sh
57
+ dlpx -f "bestvideo[height<=1080]+bestaudio" --write-subs --sub-langs en https://www.youtube.com/watch?v=dQw4w9WgXcQ
58
+ ```
59
+
60
+ Dump extracted metadata as JSON without downloading:
61
+
62
+ ```sh
63
+ dlpx -j https://www.reddit.com/r/videos/comments/abc123/some_post/
64
+ ```
65
+
66
+ ## CLI Reference
67
+
68
+ ```
69
+ Usage: dlpx [OPTIONS] URL [URL...]
70
+ ```
71
+
72
+ | Flag | Short | Type | Default | Description |
73
+ |------|-------|------|---------|-------------|
74
+ | `--format` | `-f` | string | `bv*+ba/b` | Format selection string |
75
+ | `--output` | `-o` | string | `%(title)s [%(id)s].%(ext)s` | Output filename template |
76
+ | `--extract-audio` | `-x` | boolean | false | Extract audio only |
77
+ | `--audio-format` | | string | `mp3` | Audio format (`mp3`, `aac`, `flac`, etc.) |
78
+ | `--audio-quality` | | string | `5` | Audio quality (0–10 or bitrate) |
79
+ | `--write-subs` | | boolean | false | Write subtitles to file |
80
+ | `--sub-langs` | | string | `en` | Subtitle languages |
81
+ | `--list-formats` | `-F` | boolean | false | List available formats |
82
+ | `--dump-json` | `-j` | boolean | false | Dump info JSON to stdout |
83
+ | `--quiet` | `-q` | boolean | false | Suppress output |
84
+ | `--verbose` | `-v` | boolean | false | Verbose output |
85
+ | `--no-progress` | | boolean | false | Disable progress bar |
86
+ | `--retries` | `-R` | number | `3` | Number of retries |
87
+ | `--rate-limit` | `-r` | number | none | Rate limit in bytes/sec |
88
+ | `--proxy` | | string | none | Proxy URL |
89
+ | `--cookies` | | string | none | Cookie file path |
90
+ | `--user-agent` | | string | `dlpx/0.0.0` | Custom User-Agent |
91
+ | `--referer` | | string | none | Custom Referer header |
92
+ | `--embed-thumbnail` | | boolean | false | Embed thumbnail in output file |
93
+ | `--embed-subs` | | boolean | false | Embed subtitles in output file |
94
+ | `--merge-output-format` | | string | none | Output container for merging streams |
95
+ | `--ffmpeg-location` | | string | none | Path to ffmpeg binary |
96
+ | `--version` | `-V` | boolean | false | Print version |
97
+ | `--help` | `-h` | boolean | false | Show help |
98
+
99
+ ## Supported Sites
100
+
101
+ | Site | Extractor name | URL pattern | Subtitles |
102
+ |------|---------------|-------------|-----------|
103
+ | YouTube | `youtube` | `youtube.com/watch`, `youtu.be/`, `youtube.com/shorts/`, `youtube.com/live/`, `youtube.com/playlist`, `youtube.com/channel/`, `youtube.com/@handle` | Yes (manual + auto-generated) |
104
+ | Vimeo | `vimeo` | `vimeo.com/<id>`, `player.vimeo.com/video/<id>`, channels, groups | No |
105
+ | Twitter / X | `twitter` | `twitter.com/*/status/*`, `x.com/*/status/*` | No |
106
+ | Twitter Spaces | `twitter:spaces` | `twitter.com/i/spaces/*`, `x.com/i/spaces/*` | No |
107
+ | TikTok | `tiktok` | `tiktok.com/@user/video/<id>`, `vm.tiktok.com/*` | No |
108
+ | TikTok User | `tiktok:user` | `tiktok.com/@username` | No |
109
+ | Instagram | `instagram` | `instagram.com/p/*`, `instagram.com/reel/*`, `instagram.com/reels/*` | No |
110
+ | Instagram Reels feed | `instagram:reels` | `instagram.com/reels/` | No |
111
+ | Twitch VOD | `twitch:vod` | `twitch.tv/videos/<id>` | No |
112
+ | Twitch Clip | `twitch:clip` | `twitch.tv/*/clip/*`, `clips.twitch.tv/*` | No |
113
+ | Twitch Live | `twitch:live` | `twitch.tv/<channel>` | No |
114
+ | Kick VOD | `kick` | `kick.com/video/<id>` | No |
115
+ | Kick Clip | `kick:clips` | `kick.com/<channel>/clips/<id>` | No |
116
+ | Kick Live | `kick:live` | `kick.com/<channel>` | No |
117
+ | Reddit | `reddit` | `reddit.com/r/*/comments/*`, `v.redd.it/*` | No |
118
+ | Reddit Gallery | `reddit:gallery` | `reddit.com/r/*/comments/*`, `reddit.com/gallery/*` | No |
119
+ | SoundCloud | `soundcloud` | `soundcloud.com/<user>/<track>` | No |
120
+ | SoundCloud Playlist | `soundcloud:playlist` | `soundcloud.com/<user>/sets/<playlist>` | No |
121
+ | Bilibili | `bilibili` | `bilibili.com/video/BV*`, `bilibili.com/video/av*` | No |
122
+ | Bilibili Bangumi | `bilibili:bangumi` | `bilibili.com/bangumi/play/ep*`, `bilibili.com/bangumi/play/ss*` | No |
123
+ | Niconico | `niconico` | `nicovideo.jp/watch/sm*`, `nicovideo.jp/watch/nm*` | No |
124
+ | Bandcamp | `bandcamp` | `*.bandcamp.com/track/*`, `*.bandcamp.com/album/*` | No |
125
+ | Dailymotion | `dailymotion` | `dailymotion.com/video/<id>` | No |
126
+ | Streamable | `streamable` | `streamable.com/<id>` | No |
127
+ | Coub | `coub` | `coub.com/view/*`, `coub.com/embed/*` | No |
128
+ | Imgur | `imgur` | `imgur.com/<id>`, `imgur.com/a/<id>`, `imgur.com/gallery/<id>`, `i.imgur.com/*` | No |
129
+ | Rumble | `rumble` | `rumble.com/v*.html`, `rumble.com/embed/*` | No |
130
+ | Odysee | `odysee` | `odysee.com/@*:*/<slug>`, `lbry.tv/@*:*/<slug>` | No |
131
+ | TED | `ted` | `ted.com/talks/<slug>` | Yes |
132
+ | PeerTube | `peertube` | Any PeerTube instance: `<host>/videos/watch/*`, `<host>/w/*`, `<host>/videos/embed/*` | Yes |
133
+ | Google Drive | `google-drive` | `drive.google.com/file/d/*`, `docs.google.com/file/d/*` | No |
134
+ | Dropbox | `dropbox` | `dropbox.com/s/*`, `dropbox.com/sh/*`, `dropbox.com/scl/fo/*` | No |
135
+ | Archive.org | `archive.org` | `archive.org/details/*`, `archive.org/download/*` | No |
136
+ | Spotify | `spotify` | `open.spotify.com/episode/<id>` | No |
137
+ | Generic | `generic` | Any `http://` or `https://` URL (fallback) | No |
138
+
139
+ > Spotify: only 30-second preview audio is available without authentication. Full episode audio requires Spotify auth (not currently implemented).
140
+
141
+ See [docs/supported-sites.md](docs/supported-sites.md) for full format and URL pattern details.
142
+
143
+ ## Building from Source
144
+
145
+ Requires [Bun](https://bun.sh) v1.0 or later.
146
+
147
+ ```sh
148
+ git clone https://github.com/web3mikee/dlpx
149
+ cd dlpx
150
+ bun install
151
+ bun run build # produces ./dlpx binary
152
+ ```
153
+
154
+ Run tests:
155
+
156
+ ```sh
157
+ bun test
158
+ ```
159
+
160
+ ## Writing a Custom Extractor
161
+
162
+ See [docs/plugin-guide.md](docs/plugin-guide.md) for the `BaseExtractor` interface and a minimal example.
163
+
164
+ ## License
165
+
166
+ MIT