cloud-ytdl 1.0.0-rc

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+ 'use strict'
2
+
3
+ const { request } = require('undici')
4
+ const { loadCookieHeader } = require('./load')
5
+ const xmlToSrt = require('./xmlToSrt')
6
+
7
+ async function getInnertubeConfig() {
8
+ const res = await request('https://www.youtube.com', {
9
+ headers: {
10
+ 'user-agent':
11
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
12
+ }
13
+ })
14
+
15
+ const html = await res.body.text()
16
+
17
+ const apiKey =
18
+ html.match(/"INNERTUBE_API_KEY":"([^"]+)"/)?.[1]
19
+
20
+ const clientVersion =
21
+ html.match(/"clientVersion":"([^"]+)"/)?.[1]
22
+
23
+ if (!apiKey || !clientVersion)
24
+ throw Error('Innertube config not found')
25
+
26
+ return { apiKey, clientVersion }
27
+ }
28
+
29
+ async function getPlayer(videoId, cookie) {
30
+ const { apiKey, clientVersion } =
31
+ await getInnertubeConfig()
32
+
33
+ const headers = {
34
+ 'content-type': 'application/json',
35
+ 'user-agent':
36
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
37
+ }
38
+
39
+ if (cookie) headers.cookie = cookie
40
+
41
+ const res = await request(
42
+ `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`,
43
+ {
44
+ method: 'POST',
45
+ headers,
46
+ body: JSON.stringify({
47
+ videoId,
48
+ context: {
49
+ client: {
50
+ clientName: 'WEB',
51
+ clientVersion
52
+ }
53
+ }
54
+ })
55
+ }
56
+ )
57
+
58
+ return res.body.json()
59
+ }
60
+
61
+ async function downloadSubtitle(track, cookie) {
62
+ const headers = {
63
+ 'user-agent':
64
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
65
+ accept: 'application/xml'
66
+ }
67
+
68
+ if (cookie) headers.cookie = cookie
69
+
70
+ const res = await request(track.baseUrl, { headers })
71
+ return res.body.text()
72
+ }
73
+
74
+ async function getSubtitles(videoId, opts = {}) {
75
+ const cookie = opts.cookie
76
+ ? loadCookieHeader(opts.cookie)
77
+ : null
78
+
79
+ const lang = opts.lang || 'id'
80
+ const format = opts.format || 'xml'
81
+
82
+ const player = await getPlayer(videoId, cookie)
83
+
84
+ const tracks =
85
+ player?.captions
86
+ ?.playerCaptionsTracklistRenderer
87
+ ?.captionTracks
88
+
89
+ if (!tracks) return null
90
+
91
+ let track =
92
+ tracks.find(t => t.languageCode === lang && !t.kind) ||
93
+ tracks.find(t => t.languageCode === lang && t.kind === 'asr')
94
+
95
+ if (!track) return null
96
+
97
+ const xml = await downloadSubtitle(track, cookie)
98
+
99
+ if (!xml.includes('<transcript')) return null
100
+
101
+ if (format === 'srt') return xmlToSrt(xml)
102
+
103
+ return xml
104
+ }
105
+
106
+ module.exports = { getSubtitles }
@@ -0,0 +1,58 @@
1
+ const validQueryDomains = new Set([
2
+ 'youtube.com',
3
+ 'www.youtube.com',
4
+ 'm.youtube.com',
5
+ 'music.youtube.com',
6
+ 'gaming.youtube.com'
7
+ ])
8
+
9
+ const validPathDomains = /^https?:\/\/(youtu\.be\/|(www\.)?youtube\.com\/(embed|v|shorts|live)\/)/
10
+ const urlRegex = /^https?:\/\//
11
+ const idRegex = /^[a-zA-Z0-9-_]{11}$/
12
+
13
+ const validateID = id => idRegex.test(id.trim())
14
+
15
+ const getURLVideoID = link => {
16
+ const parsed = new URL(link.trim())
17
+ let id = parsed.searchParams.get('v')
18
+
19
+ if (validPathDomains.test(link.trim()) && !id) {
20
+ const paths = parsed.pathname.split('/')
21
+ id = parsed.host === 'youtu.be' ? paths[1] : paths[2]
22
+ } else if (parsed.hostname && !validQueryDomains.has(parsed.hostname)) {
23
+ throw Error('Not a YouTube domain')
24
+ }
25
+
26
+ if (!id) throw Error(`No video id found: "${link}"`)
27
+
28
+ id = id.slice(0, 11)
29
+
30
+ if (!validateID(id)) {
31
+ throw TypeError(`Video id (${id}) does not match expected format (${idRegex})`)
32
+ }
33
+
34
+ return id
35
+ }
36
+
37
+ const getVideoID = str => {
38
+ const s = str.trim()
39
+ if (validateID(s)) return s
40
+ if (urlRegex.test(s)) return getURLVideoID(s)
41
+ throw Error(`No video id found: ${str}`)
42
+ }
43
+
44
+ const validateURL = str => {
45
+ try {
46
+ getURLVideoID(str)
47
+ return true
48
+ } catch {
49
+ return false
50
+ }
51
+ }
52
+
53
+ module.exports = {
54
+ getVideoID,
55
+ getURLVideoID,
56
+ validateID,
57
+ validateURL
58
+ }
package/lib/utils.js ADDED
@@ -0,0 +1,294 @@
1
+ const { request: undiciRequest } = require('undici')
2
+ const { writeFileSync } = require('fs')
3
+ const AGENT = require('./agents.js')
4
+ const zlib = require('zlib')
5
+
6
+ const between = (haystack, left, right) => {
7
+ let pos
8
+ if (left instanceof RegExp) {
9
+ const m = haystack.match(left)
10
+ if (!m) return ''
11
+ pos = m.index + m[0].length
12
+ } else {
13
+ pos = haystack.indexOf(left)
14
+ if (pos === -1) return ''
15
+ pos += left.length
16
+ }
17
+ haystack = haystack.slice(pos)
18
+ pos = haystack.indexOf(right)
19
+ if (pos === -1) return ''
20
+ return haystack.slice(0, pos)
21
+ }
22
+
23
+ function parseTime(text) {
24
+ if (!text) return null
25
+
26
+ const parts = text.split(':').map(Number)
27
+ let seconds = 0
28
+
29
+ for (const part of parts) {
30
+ seconds = seconds * 60 + part
31
+ }
32
+
33
+ return seconds
34
+ }
35
+ const tryParseBetween = (body, left, right, prepend = '', append = '') => {
36
+ try {
37
+ let data = between(body, left, right)
38
+ if (!data) return null
39
+ data = data.trim().replace(/[,;]$/, '')
40
+ if (data.endsWith('}}') && !data.endsWith('}}}')) data += '}'
41
+ let json = `${prepend}${data}${append}`.replace(/,\s*}/g, '}').replace(/,\s*]/g, ']')
42
+ return JSON.parse(json)
43
+ } catch {
44
+ try {
45
+ let data = between(body, left, right)
46
+ if (!data) return null
47
+ data = data
48
+ .replace(/&quot;/g, '"')
49
+ .replace(/&amp;/g, '&')
50
+ .replace(/&#39;/g, "'")
51
+ .replace(/\\"/g, '"')
52
+ .replace(/\\[nt]/g, '')
53
+ const a = data.indexOf('{')
54
+ const b = data.lastIndexOf('}')
55
+ if (a !== -1 && b !== -1) return JSON.parse(data.slice(a, b + 1))
56
+ return null
57
+ } catch {
58
+ return null
59
+ }
60
+ }
61
+ }
62
+
63
+ const extractYouTubeJSON = (body, name) => {
64
+ const patterns = [
65
+ new RegExp(`var ${name}\\s*=\\s*({.+?});`, 'i'),
66
+ new RegExp(`"${name}"\\s*:\\s*({.+?})(?:,|$)`, 'i'),
67
+ new RegExp(`${name}\\s*[":=]\\s*({.+?})(?:[,;}]|$)`, 'i'),
68
+ new RegExp(`["']${name}["']\\s*:\\s*({.+?})(?:,|$)`, 'i')
69
+ ]
70
+
71
+ for (const p of patterns) {
72
+ const m = body.match(p)
73
+ if (!m) continue
74
+ try {
75
+ let j = m[1]
76
+ const o = (j.match(/{/g) || []).length
77
+ const c = (j.match(/}/g) || []).length
78
+ if (o > c) j += '}'.repeat(o - c)
79
+ j = j
80
+ .replace(/,\s*}/g, '}')
81
+ .replace(/,\s*]/g, ']')
82
+ .replace(/([{,]\s*)(\w+):/g, '$1"$2":')
83
+ .replace(/:\s*'([^']*)'/g, ':"$1"')
84
+ return JSON.parse(j)
85
+ } catch {}
86
+ }
87
+ return null
88
+ }
89
+
90
+ const parseAbbreviatedNumber = s => {
91
+ const m = s.replace(',', '.').replace(' ', '').match(/([\d.]+)([MK]?)/)
92
+ if (!m) return null
93
+ const n = parseFloat(m[1])
94
+ return Math.round(m[2] === 'M' ? n * 1e6 : m[2] === 'K' ? n * 1e3 : n)
95
+ }
96
+
97
+ const ESC = [
98
+ { start: '"', end: '"' },
99
+ { start: "'", end: "'" },
100
+ { start: '`', end: '`' },
101
+ { start: '/', end: '/', startPrefix: /(^|[[{:;,/])\s?$/ }
102
+ ]
103
+
104
+ const cutAfterJS = src => {
105
+ const open = src[0] === '[' ? '[' : src[0] === '{' ? '{' : null
106
+ const close = open === '[' ? ']' : '}'
107
+ if (!open) throw Error(`Can't cut JSON starting with ${src[0]}`)
108
+
109
+ let esc = null
110
+ let slash = false
111
+ let depth = 0
112
+
113
+ for (let i = 0; i < src.length; i++) {
114
+ if (!slash && esc && src[i] === esc.end) {
115
+ esc = null
116
+ continue
117
+ }
118
+ if (!slash && !esc) {
119
+ for (const e of ESC) {
120
+ if (src[i] === e.start && (!e.startPrefix || src.slice(i - 10, i).match(e.startPrefix))) {
121
+ esc = e
122
+ break
123
+ }
124
+ }
125
+ if (esc) continue
126
+ }
127
+ slash = src[i] === '\\' && !slash
128
+ if (esc) continue
129
+ if (src[i] === open) depth++
130
+ if (src[i] === close) depth--
131
+ if (depth === 0) return src.slice(0, i + 1)
132
+ }
133
+ throw Error('Unclosed JSON')
134
+ }
135
+
136
+ class UnrecoverableError extends Error {}
137
+
138
+ const playError = r => {
139
+ const p = r?.playabilityStatus
140
+ if (!p) return null
141
+ if (['ERROR', 'LOGIN_REQUIRED', 'LIVE_STREAM_OFFLINE', 'UNPLAYABLE'].includes(p.status)) {
142
+ return new UnrecoverableError(p.reason || p.messages?.[0])
143
+ }
144
+ return null
145
+ }
146
+
147
+ const useFetch = async (fetch, url, opts) => {
148
+ if (opts?.query) {
149
+ const u = new URL(url)
150
+ for (const k in opts.query) u.searchParams.append(k, opts.query[k])
151
+ url = u.toString()
152
+ }
153
+ const r = await fetch(url, opts)
154
+ return {
155
+ statusCode: r.status,
156
+ headers: Object.fromEntries(r.headers.entries()),
157
+ body: Object.assign(r, r.body || {})
158
+ }
159
+ }
160
+
161
+ const request = async (url, options = {}) => {
162
+ let { requestOptions = {}, rewriteRequest, fetch } = options
163
+
164
+ if (rewriteRequest) {
165
+ const r = rewriteRequest(url, requestOptions)
166
+ url = r.url || url
167
+ requestOptions = r.requestOptions || requestOptions
168
+ }
169
+
170
+ requestOptions.headers ||= {}
171
+ requestOptions.headers['Accept-Encoding'] ||= 'identity'
172
+
173
+ const res = fetch
174
+ ? await useFetch(fetch, url, requestOptions)
175
+ : await undiciRequest(url, requestOptions)
176
+
177
+ const code = String(res.statusCode)
178
+ if (code.startsWith('2')) {
179
+ if (res.headers['content-type']?.includes('json')) return res.body.json()
180
+ let text = await res.body.text()
181
+ if (/[\x00-\x1F\x7F-\xFF]/.test(text.slice(0, 100)) && res.body.arrayBuffer) {
182
+ try {
183
+ const buf = Buffer.from(await res.body.arrayBuffer())
184
+ if (res.headers['content-encoding'] === 'gzip') text = zlib.gunzipSync(buf).toString()
185
+ if (res.headers['content-encoding'] === 'deflate') text = zlib.inflateSync(buf).toString()
186
+ } catch {}
187
+ }
188
+ return text
189
+ }
190
+ if (code.startsWith('3')) return request(res.headers.location, options)
191
+ const e = Error(`Status code: ${code}`)
192
+ e.statusCode = res.statusCode
193
+ throw e
194
+ }
195
+
196
+ const deprecate = (obj, prop, val) =>
197
+ Object.defineProperty(obj, prop, { get: () => val })
198
+
199
+ const getRandomIPv6 = ip => {
200
+ const [addr, maskRaw] = ip.split('/')
201
+ const mask = +maskRaw
202
+ if (!isIPv6(ip) || mask < 1 || mask > 128) throw Error('Invalid IPv6')
203
+ const base = normalizeIP(addr)
204
+ const full = Math.floor(mask / 16)
205
+ const rem = mask % 16
206
+ const out = new Array(8).fill(0)
207
+ for (let i = 0; i < 8; i++) {
208
+ if (i < full) out[i] = base[i]
209
+ else if (i === full && rem) {
210
+ const m = 0xffff << (16 - rem)
211
+ out[i] = (base[i] & m) | Math.floor(Math.random() * (1 << (16 - rem)))
212
+ } else out[i] = Math.floor(Math.random() * 0x10000)
213
+ }
214
+ return out.map(v => v.toString(16).padStart(4, '0')).join(':')
215
+ }
216
+
217
+ const isIPv6 = ip =>
218
+ /^(?:[0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}(?:\/\d{1,3})?$/.test(ip)
219
+
220
+ const normalizeIP = ip => {
221
+ const [a, b] = ip.split('::')
222
+ const s = a ? a.split(':') : []
223
+ const e = b ? b.split(':') : []
224
+ const z = new Array(8 - s.length - e.length).fill('0')
225
+ return [...s, ...z, ...e].map(p => parseInt(p || '0', 16))
226
+ }
227
+
228
+ const saveDebugFile = (name, body) => {
229
+ if (process.env.YTDL_NO_DEBUG_FILE) return body
230
+ const p = process.env.YTDL_DEBUG_PATH || '.'
231
+ const f = `${Date.now()}-${name}`
232
+ writeFileSync(`${p}/${f}`, body)
233
+ return f
234
+ }
235
+
236
+ const findKeyI = (o, p) => Object.keys(o).find(k => k.toLowerCase() === p.toLowerCase())
237
+ const getPropInsensitive = (o, p) => o[findKeyI(o, p)]
238
+ const setPropInsensitive = (o, p, v) => (o[findKeyI(o, p) || p] = v)
239
+
240
+ const applyDefaultAgent = o => {
241
+ if (o.agent) return
242
+ const { jar } = AGENT.defaultAgent
243
+ const c = getPropInsensitive(o.requestOptions.headers, 'cookie')
244
+ if (c) {
245
+ jar.removeAllCookiesSync()
246
+ AGENT.addCookiesFromString(jar, c)
247
+ }
248
+ o.agent = AGENT.defaultAgent
249
+ }
250
+
251
+ const applyIPv6Rotations = o => {
252
+ if (o.IPv6Block) {
253
+ o.requestOptions.localAddress = getRandomIPv6(o.IPv6Block)
254
+ }
255
+ }
256
+
257
+ const applyDefaultHeaders = o => {
258
+ o.requestOptions = { ...o.requestOptions }
259
+ o.requestOptions.headers = { 'Accept-Encoding': 'identity', ...o.requestOptions.headers }
260
+ }
261
+
262
+ const generateClientPlaybackNonce = l => {
263
+ const c = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
264
+ let r = ''
265
+ while (l--) r += c[Math.random() * c.length | 0]
266
+ return r
267
+ }
268
+
269
+ const applyPlayerClients = o => {
270
+ if (!o.playerClients?.length) o.playerClients = ['WEB_EMBEDDED', 'IOS', 'ANDROID', 'TV']
271
+ }
272
+ const applyOldLocalAddress = () => {}
273
+
274
+ module.exports = {
275
+ between,
276
+ tryParseBetween,
277
+ extractYouTubeJSON,
278
+ applyOldLocalAddress,
279
+ parseAbbreviatedNumber,
280
+ parseTime,
281
+ cutAfterJS,
282
+ UnrecoverableError,
283
+ playError,
284
+ request,
285
+ deprecate,
286
+ saveDebugFile,
287
+ getPropInsensitive,
288
+ setPropInsensitive,
289
+ applyDefaultAgent,
290
+ applyIPv6Rotations,
291
+ applyDefaultHeaders,
292
+ generateClientPlaybackNonce,
293
+ applyPlayerClients
294
+ }
@@ -0,0 +1,49 @@
1
+ 'use strict'
2
+
3
+ function pad(num, size = 2) {
4
+ return String(num).padStart(size, '0')
5
+ }
6
+
7
+ function secToTime(sec) {
8
+ const ms = Math.floor((sec % 1) * 1000)
9
+ sec = Math.floor(sec)
10
+
11
+ const s = sec % 60
12
+ const m = Math.floor(sec / 60) % 60
13
+ const h = Math.floor(sec / 3600)
14
+
15
+ return (
16
+ pad(h) + ':' +
17
+ pad(m) + ':' +
18
+ pad(s) + ',' +
19
+ pad(ms, 3)
20
+ )
21
+ }
22
+
23
+ module.exports = function xmlToSrt(xml) {
24
+ const entries = [...xml.matchAll(
25
+ /<text start="([\d.]+)" dur="([\d.]+)">(.*?)<\/text>/gs
26
+ )]
27
+
28
+ let i = 1
29
+ let out = ''
30
+
31
+ for (const [, start, dur, text] of entries) {
32
+ const s = Number(start)
33
+ const e = s + Number(dur)
34
+
35
+ out +=
36
+ i++ + '\n' +
37
+ secToTime(s) + ' --> ' + secToTime(e) + '\n' +
38
+ text
39
+ .replace(/&amp;/g, '&')
40
+ .replace(/&lt;/g, '<')
41
+ .replace(/&gt;/g, '>')
42
+ .replace(/&#39;/g, "'")
43
+ .replace(/&quot;/g, '"')
44
+ .trim() +
45
+ '\n\n'
46
+ }
47
+
48
+ return out.trim()
49
+ }
package/package.json ADDED
@@ -0,0 +1,80 @@
1
+ {
2
+ "name": "cloud-ytdl",
3
+ "version": "1.0.0-rc",
4
+ "description": "Reliable YouTube downloader and scraper using InnerTube clients. Includes video, stream, and Community Post extraction. Production-ready for Node.js 18+.",
5
+ "keywords": [
6
+ "youtube",
7
+ "ytdl",
8
+ "ytdl-core",
9
+ "downloader",
10
+ "video",
11
+ "audio",
12
+ "stream",
13
+ "innertube",
14
+ "signature-decoding",
15
+ "cipher",
16
+ "hls",
17
+ "dash",
18
+ "m3u8",
19
+ "community-post",
20
+ "poll",
21
+ "cjs",
22
+ "node18"
23
+ ],
24
+ "author": "AlfiDev (https://github.com/cloudkuimages)",
25
+ "license": "MIT",
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "https://github.com/cloudkuimages/ytdl-cloud.git"
29
+ },
30
+ "homepage": "https://github.com/cloudkuimages/ytdl-cloud",
31
+ "bugs": {
32
+ "url": "https://github.com/cloudkuimages/ytdl-cloud/issues"
33
+ },
34
+ "type": "commonjs",
35
+ "main": "./lib/index.js",
36
+ "types": "./types/index.d.ts",
37
+ "exports": {
38
+ ".": {
39
+ "require": "./lib/index.js",
40
+ "types": "./types/index.d.ts"
41
+ },
42
+ "./agents": "./lib/agents.js",
43
+ "./cache": "./lib/cache.js",
44
+ "./format": "./lib/format.js",
45
+ "./format-utils": "./lib/format-utils.js",
46
+ "./info": "./lib/info.js",
47
+ "./innertube": "./lib/innertube.js",
48
+ "./post": "./lib/post.js",
49
+ "./sig-decoder": "./lib/sig-decoder.js",
50
+ "./url-utils": "./lib/url-utils.js",
51
+ "./utils": "./lib/utils.js"
52
+ },
53
+ "files": [
54
+ "lib",
55
+ "types"
56
+ ],
57
+ "engines": {
58
+ "node": ">=18"
59
+ },
60
+ "scripts": {
61
+ "lint": "eslint .",
62
+ "lint:fix": "eslint . --fix",
63
+ "test": "mocha --timeout 10000"
64
+ },
65
+ "dependencies": {
66
+ "http-cookie-agent": "^6.0.8",
67
+ "https-proxy-agent": "^7.0.6",
68
+ "m3u8stream": "^0.8.6",
69
+ "miniget": "^4.2.3",
70
+ "sax": "^1.4.1",
71
+ "tough-cookie": "^4.1.4",
72
+ "undici": "^6.0.0"
73
+ },
74
+ "devDependencies": {
75
+ "@types/node": "^18.19.0",
76
+ "eslint": "^8.56.0",
77
+ "mocha": "^10.3.0",
78
+ "typescript": "^5.3.3"
79
+ }
80
+ }