@nadimtuhin/ytranscript 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,5 +1,5 @@
1
- #!/usr/bin/env bun
2
- // @bun
1
+ #!/usr/bin/env node
2
+ import {createRequire} from "node:module";
3
3
  var __create = Object.create;
4
4
  var __getProtoOf = Object.getPrototypeOf;
5
5
  var __defProp = Object.defineProperty;
@@ -17,6 +17,7 @@ var __toESM = (mod, isNodeMode, target) => {
17
17
  return to;
18
18
  };
19
19
  var __commonJS = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
20
+ var __require = /* @__PURE__ */ createRequire(import.meta.url);
20
21
 
21
22
  // node_modules/commander/lib/error.js
22
23
  var require_error = __commonJS((exports) => {
@@ -616,11 +617,11 @@ var require_command = __commonJS((exports) => {
616
617
  return arg;
617
618
  });
618
619
  }
619
- var EventEmitter = import.meta.require("events").EventEmitter;
620
- var childProcess = import.meta.require("child_process");
621
- var path = import.meta.require("path");
622
- var fs = import.meta.require("fs");
623
- var process2 = import.meta.require("process");
620
+ var EventEmitter = __require("node:events").EventEmitter;
621
+ var childProcess = __require("node:child_process");
622
+ var path = __require("node:path");
623
+ var fs = __require("node:fs");
624
+ var process2 = __require("node:process");
624
625
  var { Argument, humanReadableArgName } = require_argument();
625
626
  var { CommanderError } = require_error();
626
627
  var { Help } = require_help();
@@ -1844,6 +1845,9 @@ var require_commander = __commonJS((exports) => {
1844
1845
  exports.InvalidOptionArgumentError = InvalidArgumentError;
1845
1846
  });
1846
1847
 
1848
+ // src/cli.ts
1849
+ import {readFile as readFile2, writeFile as writeFile2} from "node:fs/promises";
1850
+
1847
1851
  // node_modules/commander/esm.mjs
1848
1852
  var import_ = __toESM(require_commander(), 1);
1849
1853
  var {
@@ -1860,9 +1864,10 @@ var {
1860
1864
  Help
1861
1865
  } = import_.default;
1862
1866
  // package.json
1863
- var version = "1.0.2";
1867
+ var version = "1.2.0";
1864
1868
 
1865
1869
  // src/lib/fetcher.ts
1870
+ import {createRequire as createRequire2} from "node:module";
1866
1871
  function extractVideoId(input) {
1867
1872
  if (/^[a-zA-Z0-9_-]{11}$/.test(input)) {
1868
1873
  return input;
@@ -1888,9 +1893,20 @@ function extractVideoId(input) {
1888
1893
  }
1889
1894
  return null;
1890
1895
  }
1891
- async function fetchPlayerResponse(videoId, timeout) {
1896
+ function createProxyAgent(proxy) {
1897
+ if (proxy) {
1898
+ return new ProxyAgent(proxy.url);
1899
+ }
1900
+ const envProxy = process.env.HTTP_PROXY || process.env.http_proxy || process.env.HTTPS_PROXY || process.env.https_proxy;
1901
+ if (envProxy) {
1902
+ return new ProxyAgent(envProxy);
1903
+ }
1904
+ return;
1905
+ }
1906
+ async function fetchPlayerResponse(videoId, timeout, proxy) {
1892
1907
  const controller = new AbortController;
1893
1908
  const timeoutId = setTimeout(() => controller.abort(), timeout);
1909
+ const dispatcher = createProxyAgent(proxy);
1894
1910
  try {
1895
1911
  const response = await fetch("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", {
1896
1912
  method: "POST",
@@ -1907,7 +1923,8 @@ async function fetchPlayerResponse(videoId, timeout) {
1907
1923
  },
1908
1924
  videoId
1909
1925
  }),
1910
- signal: controller.signal
1926
+ signal: controller.signal,
1927
+ ...dispatcher && { dispatcher }
1911
1928
  });
1912
1929
  if (!response.ok) {
1913
1930
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
@@ -1917,14 +1934,16 @@ async function fetchPlayerResponse(videoId, timeout) {
1917
1934
  clearTimeout(timeoutId);
1918
1935
  }
1919
1936
  }
1920
- async function fetchCaptionTrack(url, timeout) {
1937
+ async function fetchCaptionTrack(url, timeout, proxy) {
1921
1938
  const controller = new AbortController;
1922
1939
  const timeoutId = setTimeout(() => controller.abort(), timeout);
1940
+ const dispatcher = createProxyAgent(proxy);
1923
1941
  try {
1924
1942
  const jsonUrl = `${url}&fmt=json3`;
1925
1943
  const response = await fetch(jsonUrl, {
1926
1944
  headers: { "User-Agent": USER_AGENT },
1927
- signal: controller.signal
1945
+ signal: controller.signal,
1946
+ ...dispatcher && { dispatcher }
1928
1947
  });
1929
1948
  if (!response.ok) {
1930
1949
  throw new Error(`HTTP ${response.status}`);
@@ -1962,9 +1981,14 @@ function selectCaptionTrack(tracks, preferredLanguages, includeAutoGenerated) {
1962
1981
  }
1963
1982
  return searchOrder[0] || null;
1964
1983
  }
1984
+ async function fetchVideoInfo(videoId, options = {}) {
1985
+ const { timeout = 30000, proxy } = options;
1986
+ const playerResponse = await fetchPlayerResponse(videoId, timeout, proxy);
1987
+ return playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
1988
+ }
1965
1989
  async function fetchTranscript(videoId, options = {}) {
1966
- const { languages = ["en"], timeout = 30000, includeAutoGenerated = true } = options;
1967
- const playerResponse = await fetchPlayerResponse(videoId, timeout);
1990
+ const { languages = ["en"], timeout = 30000, includeAutoGenerated = true, proxy } = options;
1991
+ const playerResponse = await fetchPlayerResponse(videoId, timeout, proxy);
1968
1992
  const captionTracks = playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
1969
1993
  if (!captionTracks?.length) {
1970
1994
  throw new Error("No captions available for this video");
@@ -1973,7 +1997,7 @@ async function fetchTranscript(videoId, options = {}) {
1973
1997
  if (!selectedTrack) {
1974
1998
  throw new Error("No suitable caption track found");
1975
1999
  }
1976
- const segments = await fetchCaptionTrack(selectedTrack.baseUrl, timeout);
2000
+ const segments = await fetchCaptionTrack(selectedTrack.baseUrl, timeout, proxy);
1977
2001
  if (!segments.length) {
1978
2002
  throw new Error("Caption track is empty");
1979
2003
  }
@@ -1986,6 +2010,8 @@ async function fetchTranscript(videoId, options = {}) {
1986
2010
  isAutoGenerated: selectedTrack.kind === "asr"
1987
2011
  };
1988
2012
  }
2013
+ var require2 = createRequire2(import.meta.url);
2014
+ var ProxyAgent = require2("undici/lib/dispatcher/proxy-agent");
1989
2015
  var USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
1990
2016
  // node_modules/yocto-queue/index.js
1991
2017
  class Node {
@@ -2163,6 +2189,26 @@ async function* streamVideos(videos, options = {}) {
2163
2189
  var DEFAULT_CONCURRENCY = 4;
2164
2190
  var DEFAULT_PAUSE_AFTER = 10;
2165
2191
  var DEFAULT_PAUSE_DURATION = 5000;
2192
+ // src/lib/fs.ts
2193
+ import {readFile, writeFile, appendFile, access, constants} from "node:fs/promises";
2194
+ async function fileExists(path) {
2195
+ try {
2196
+ await access(path, constants.F_OK);
2197
+ return true;
2198
+ } catch {
2199
+ return false;
2200
+ }
2201
+ }
2202
+ async function readTextFile(path) {
2203
+ return readFile(path, "utf-8");
2204
+ }
2205
+ async function writeTextFile(path, content) {
2206
+ await writeFile(path, content, "utf-8");
2207
+ }
2208
+ async function appendTextFile(path, content) {
2209
+ await appendFile(path, content, "utf-8");
2210
+ }
2211
+
2166
2212
  // src/loaders/history.ts
2167
2213
  function extractVideoIdFromUrl(url) {
2168
2214
  try {
@@ -2178,8 +2224,7 @@ function extractVideoIdFromUrl(url) {
2178
2224
  return null;
2179
2225
  }
2180
2226
  async function loadWatchHistory(filePath) {
2181
- const file = Bun.file(filePath);
2182
- const text = await file.text();
2227
+ const text = await readTextFile(filePath);
2183
2228
  const data = JSON.parse(text);
2184
2229
  const results = [];
2185
2230
  for (const item of data) {
@@ -2243,8 +2288,7 @@ function parseCSVLine(line) {
2243
2288
  return result;
2244
2289
  }
2245
2290
  async function loadWatchLater(filePath) {
2246
- const file = Bun.file(filePath);
2247
- const text = await file.text();
2291
+ const text = await readTextFile(filePath);
2248
2292
  const rows = parseCSV(text);
2249
2293
  const results = [];
2250
2294
  for (const row of rows) {
@@ -2290,11 +2334,11 @@ function mergeVideoSources(...sources) {
2290
2334
  async function loadProcessedIds(jsonlPath) {
2291
2335
  const ids = new Set;
2292
2336
  try {
2293
- const file = Bun.file(jsonlPath);
2294
- if (!await file.exists()) {
2337
+ const exists = await fileExists(jsonlPath);
2338
+ if (!exists) {
2295
2339
  return ids;
2296
2340
  }
2297
- const text = await file.text();
2341
+ const text = await readTextFile(jsonlPath);
2298
2342
  const lines = text.split("\n").filter((l) => l.trim());
2299
2343
  for (const line of lines) {
2300
2344
  try {
@@ -2313,10 +2357,7 @@ async function loadProcessedIds(jsonlPath) {
2313
2357
  }
2314
2358
  // src/outputs/index.ts
2315
2359
  async function appendJsonl(result, path) {
2316
- const file = Bun.file(path);
2317
- const existing = await file.exists() ? await file.text() : "";
2318
- const newContent = `${existing + JSON.stringify(result)}\n`;
2319
- await Bun.write(path, newContent);
2360
+ await appendTextFile(path, `${JSON.stringify(result)}\n`);
2320
2361
  }
2321
2362
  async function writeCsv(results, options) {
2322
2363
  const headers = [
@@ -2348,12 +2389,12 @@ async function writeCsv(results, options) {
2348
2389
  ...rows.map((row) => row.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(","))
2349
2390
  ].join("\n");
2350
2391
  if (options.append) {
2351
- const file = Bun.file(options.path);
2352
- const existing = await file.exists() ? await file.text() : "";
2353
- const content = existing ? `${rows.map((row) => row.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(",")).join("\n")}\n` : `${csvContent}\n`;
2354
- await Bun.write(options.path, existing + content);
2392
+ const exists = await fileExists(options.path);
2393
+ const rowsContent = rows.map((row) => row.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(",")).join("\n");
2394
+ const content = exists ? `${rowsContent}\n` : `${csvContent}\n`;
2395
+ await appendTextFile(options.path, content);
2355
2396
  } else {
2356
- await Bun.write(options.path, `${csvContent}\n`);
2397
+ await writeTextFile(options.path, `${csvContent}\n`);
2357
2398
  }
2358
2399
  }
2359
2400
  function formatSrt(transcript) {
@@ -2408,20 +2449,40 @@ function pad(num, size) {
2408
2449
  return String(num).padStart(size, "0");
2409
2450
  }
2410
2451
  // src/cli.ts
2452
+ function validateProxyUrl(url) {
2453
+ try {
2454
+ const parsed = new URL(url);
2455
+ return ["http:", "https:", "socks4:", "socks5:"].includes(parsed.protocol);
2456
+ } catch {
2457
+ return false;
2458
+ }
2459
+ }
2460
+ function parseProxy(proxyUrl) {
2461
+ if (!proxyUrl)
2462
+ return;
2463
+ if (!validateProxyUrl(proxyUrl)) {
2464
+ console.error(red(`Invalid proxy URL: ${proxyUrl}`));
2465
+ console.error(dim("Expected format: http://[user:pass@]host:port"));
2466
+ process.exit(1);
2467
+ }
2468
+ return { url: proxyUrl };
2469
+ }
2411
2470
  var green = (s) => `\x1B[32m${s}\x1B[0m`;
2412
2471
  var red = (s) => `\x1B[31m${s}\x1B[0m`;
2413
2472
  var yellow = (s) => `\x1B[33m${s}\x1B[0m`;
2414
2473
  var dim = (s) => `\x1B[2m${s}\x1B[0m`;
2415
2474
  program.name("ytranscript").description("Fast YouTube transcript extraction with bulk processing").version(version);
2416
- program.command("get <video>").description("Fetch transcript for a single video (ID or URL)").option("-l, --lang <codes>", "Preferred language codes (comma-separated)", "en").option("-f, --format <format>", "Output format: text, json, srt, vtt", "text").option("-t, --timestamps", "Include timestamps in text output").option("-o, --output <file>", "Write to file instead of stdout").action(async (video, options) => {
2475
+ program.command("get <video>").description("Fetch transcript for a single video (ID or URL)").option("-l, --lang <codes>", "Preferred language codes (comma-separated)", "en").option("-f, --format <format>", "Output format: text, json, srt, vtt", "text").option("-t, --timestamps", "Include timestamps in text output").option("-o, --output <file>", "Write to file instead of stdout").option("--proxy <url>", "HTTP proxy URL (e.g., http://user:pass@host:port)").action(async (video, options) => {
2417
2476
  const videoId = extractVideoId(video);
2418
2477
  if (!videoId) {
2419
2478
  console.error(red(`Invalid video ID or URL: ${video}`));
2420
2479
  process.exit(1);
2421
2480
  }
2481
+ const proxy = parseProxy(options.proxy);
2422
2482
  try {
2423
2483
  const transcript = await fetchTranscript(videoId, {
2424
- languages: options.lang.split(",")
2484
+ languages: options.lang.split(","),
2485
+ proxy
2425
2486
  });
2426
2487
  let output;
2427
2488
  switch (options.format) {
@@ -2438,7 +2499,7 @@ program.command("get <video>").description("Fetch transcript for a single video
2438
2499
  output = formatText(transcript, options.timestamps);
2439
2500
  }
2440
2501
  if (options.output) {
2441
- await Bun.write(options.output, output);
2502
+ await writeFile2(options.output, output, "utf-8");
2442
2503
  console.log(green(`Written to ${options.output}`));
2443
2504
  } else {
2444
2505
  console.log(output);
@@ -2448,7 +2509,7 @@ program.command("get <video>").description("Fetch transcript for a single video
2448
2509
  process.exit(1);
2449
2510
  }
2450
2511
  });
2451
- program.command("bulk").description("Bulk fetch transcripts from Google Takeout or video list").option("--history <file>", "Path to Google Takeout watch-history.json").option("--watch-later <file>", "Path to Google Takeout watch-later.csv").option("--videos <ids>", "Comma-separated video IDs or URLs").option("--file <file>", "File with video IDs/URLs (one per line)").option("-o, --out-jsonl <file>", "Output JSONL file", "transcripts.jsonl").option("--out-csv <file>", "Also write to CSV file").option("-c, --concurrency <n>", "Concurrent requests", "4").option("--pause-after <n>", "Pause after N requests", "10").option("--pause-ms <n>", "Pause duration in ms", "5000").option("-l, --lang <codes>", "Preferred languages (comma-separated)", "en").option("--resume", "Resume from previous run (skip already processed)").action(async (options) => {
2512
+ program.command("bulk").description("Bulk fetch transcripts from Google Takeout or video list").option("--history <file>", "Path to Google Takeout watch-history.json").option("--watch-later <file>", "Path to Google Takeout watch-later.csv").option("--videos <ids>", "Comma-separated video IDs or URLs").option("--file <file>", "File with video IDs/URLs (one per line)").option("-o, --out-jsonl <file>", "Output JSONL file", "transcripts.jsonl").option("--out-csv <file>", "Also write to CSV file").option("-c, --concurrency <n>", "Concurrent requests", "4").option("--pause-after <n>", "Pause after N requests", "10").option("--pause-ms <n>", "Pause duration in ms", "5000").option("-l, --lang <codes>", "Preferred languages (comma-separated)", "en").option("--proxy <url>", "HTTP proxy URL (e.g., http://user:pass@host:port)").option("--resume", "Resume from previous run (skip already processed)").action(async (options) => {
2452
2513
  const sources = [];
2453
2514
  if (options.history) {
2454
2515
  console.log(dim(`Loading watch history from ${options.history}...`));
@@ -2477,7 +2538,7 @@ program.command("bulk").description("Bulk fetch transcripts from Google Takeout
2477
2538
  }
2478
2539
  if (options.file) {
2479
2540
  try {
2480
- const content = await Bun.file(options.file).text();
2541
+ const content = await readFile2(options.file, "utf-8");
2481
2542
  const ids = content.split("\n").map((l) => l.trim()).filter((l) => l && !l.startsWith("#"));
2482
2543
  sources.push(fromVideoIds(ids));
2483
2544
  console.log(` Added ${ids.length} videos from ${options.file}`);
@@ -2503,15 +2564,17 @@ program.command("bulk").description("Bulk fetch transcripts from Google Takeout
2503
2564
  console.log(green("All videos already processed!"));
2504
2565
  return;
2505
2566
  }
2506
- console.log(`Processing ${toProcess.length} videos...\n`);
2567
+ console.log(`Processing ${toProcess.length} videos...\\n`);
2507
2568
  let successCount = 0;
2508
2569
  let failCount = 0;
2509
2570
  const csvResults = [];
2571
+ const proxy = parseProxy(options.proxy);
2510
2572
  for await (const result of streamVideos(toProcess, {
2511
2573
  concurrency: Number.parseInt(options.concurrency, 10),
2512
2574
  pauseAfter: Number.parseInt(options.pauseAfter, 10),
2513
2575
  pauseDuration: Number.parseInt(options.pauseMs, 10),
2514
- languages: options.lang.split(",")
2576
+ languages: options.lang.split(","),
2577
+ proxy
2515
2578
  })) {
2516
2579
  const status = result.transcript ? green("OK") : red("FAIL");
2517
2580
  const title = result.meta.title?.slice(0, 50) || result.meta.videoId;
@@ -2533,25 +2596,15 @@ program.command("bulk").description("Bulk fetch transcripts from Google Takeout
2533
2596
  console.log(`\n${green("Done!")} ${successCount} succeeded, ${failCount} failed`);
2534
2597
  console.log(`Output: ${options.outJsonl}`);
2535
2598
  });
2536
- program.command("info <video>").description("Show available transcript languages for a video").action(async (video) => {
2599
+ program.command("info <video>").description("Show available transcript languages for a video").option("--proxy <url>", "HTTP proxy URL (e.g., http://user:pass@host:port)").action(async (video, options) => {
2537
2600
  const videoId = extractVideoId(video);
2538
2601
  if (!videoId) {
2539
2602
  console.error(red(`Invalid video ID or URL: ${video}`));
2540
2603
  process.exit(1);
2541
2604
  }
2605
+ const proxy = parseProxy(options.proxy);
2542
2606
  try {
2543
- const response = await fetch("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", {
2544
- method: "POST",
2545
- headers: { "Content-Type": "application/json" },
2546
- body: JSON.stringify({
2547
- context: {
2548
- client: { clientName: "WEB", clientVersion: "2.20240101.00.00" }
2549
- },
2550
- videoId
2551
- })
2552
- });
2553
- const data = await response.json();
2554
- const tracks = data.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
2607
+ const tracks = await fetchVideoInfo(videoId, { proxy });
2555
2608
  if (!tracks.length) {
2556
2609
  console.log(yellow("No captions available for this video"));
2557
2610
  return;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * ytranscript - Fast YouTube transcript extraction
3
+ *
4
+ * @example
5
+ * ```typescript
6
+ * import { fetchTranscript, processVideos } from 'ytranscript';
7
+ *
8
+ * // Fetch a single transcript
9
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
10
+ * console.log(transcript.text);
11
+ *
12
+ * // Bulk process from Google Takeout
13
+ * import { loadWatchHistory, loadWatchLater, mergeVideoSources } from 'ytranscript';
14
+ *
15
+ * const history = await loadWatchHistory('./watch-history.json');
16
+ * const watchLater = await loadWatchLater('./watch-later.csv');
17
+ * const videos = mergeVideoSources(history, watchLater);
18
+ *
19
+ * const results = await processVideos(videos, {
20
+ * concurrency: 4,
21
+ * onProgress: (done, total) => console.log(`${done}/${total}`)
22
+ * });
23
+ * ```
24
+ */
25
+ export { fetchTranscript, extractVideoId, fetchVideoInfo } from './lib/fetcher';
26
+ export { processVideos, streamVideos } from './lib/processor';
27
+ export { loadWatchHistory, loadWatchLater, fromVideoIds, mergeVideoSources, loadProcessedIds, } from './loaders';
28
+ export { writeJsonl, appendJsonl, writeCsv, formatSrt, formatVtt, formatText, } from './outputs';
29
+ export type { ProxyConfig, Transcript, TranscriptSegment, TranscriptResult, WatchHistoryMeta, FetchOptions, BulkOptions, OutputFormat, OutputOptions, } from './types';
30
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAGhF,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAG9D,OAAO,EACL,gBAAgB,EAChB,cAAc,EACd,YAAY,EACZ,iBAAiB,EACjB,gBAAgB,GACjB,MAAM,WAAW,CAAC;AAGnB,OAAO,EACL,UAAU,EACV,WAAW,EACX,QAAQ,EACR,SAAS,EACT,SAAS,EACT,UAAU,GACX,MAAM,WAAW,CAAC;AAGnB,YAAY,EACV,WAAW,EACX,UAAU,EACV,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,WAAW,EACX,YAAY,EACZ,aAAa,GACd,MAAM,SAAS,CAAC"}
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  // src/lib/fetcher.ts
2
+ import {createRequire} from "node:module";
2
3
  function extractVideoId(input) {
3
4
  if (/^[a-zA-Z0-9_-]{11}$/.test(input)) {
4
5
  return input;
@@ -24,9 +25,20 @@ function extractVideoId(input) {
24
25
  }
25
26
  return null;
26
27
  }
27
- async function fetchPlayerResponse(videoId, timeout) {
28
+ function createProxyAgent(proxy) {
29
+ if (proxy) {
30
+ return new ProxyAgent(proxy.url);
31
+ }
32
+ const envProxy = process.env.HTTP_PROXY || process.env.http_proxy || process.env.HTTPS_PROXY || process.env.https_proxy;
33
+ if (envProxy) {
34
+ return new ProxyAgent(envProxy);
35
+ }
36
+ return;
37
+ }
38
+ async function fetchPlayerResponse(videoId, timeout, proxy) {
28
39
  const controller = new AbortController;
29
40
  const timeoutId = setTimeout(() => controller.abort(), timeout);
41
+ const dispatcher = createProxyAgent(proxy);
30
42
  try {
31
43
  const response = await fetch("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", {
32
44
  method: "POST",
@@ -43,7 +55,8 @@ async function fetchPlayerResponse(videoId, timeout) {
43
55
  },
44
56
  videoId
45
57
  }),
46
- signal: controller.signal
58
+ signal: controller.signal,
59
+ ...dispatcher && { dispatcher }
47
60
  });
48
61
  if (!response.ok) {
49
62
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
@@ -53,14 +66,16 @@ async function fetchPlayerResponse(videoId, timeout) {
53
66
  clearTimeout(timeoutId);
54
67
  }
55
68
  }
56
- async function fetchCaptionTrack(url, timeout) {
69
+ async function fetchCaptionTrack(url, timeout, proxy) {
57
70
  const controller = new AbortController;
58
71
  const timeoutId = setTimeout(() => controller.abort(), timeout);
72
+ const dispatcher = createProxyAgent(proxy);
59
73
  try {
60
74
  const jsonUrl = `${url}&fmt=json3`;
61
75
  const response = await fetch(jsonUrl, {
62
76
  headers: { "User-Agent": USER_AGENT },
63
- signal: controller.signal
77
+ signal: controller.signal,
78
+ ...dispatcher && { dispatcher }
64
79
  });
65
80
  if (!response.ok) {
66
81
  throw new Error(`HTTP ${response.status}`);
@@ -98,9 +113,14 @@ function selectCaptionTrack(tracks, preferredLanguages, includeAutoGenerated) {
98
113
  }
99
114
  return searchOrder[0] || null;
100
115
  }
116
+ async function fetchVideoInfo(videoId, options = {}) {
117
+ const { timeout = 30000, proxy } = options;
118
+ const playerResponse = await fetchPlayerResponse(videoId, timeout, proxy);
119
+ return playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
120
+ }
101
121
  async function fetchTranscript(videoId, options = {}) {
102
- const { languages = ["en"], timeout = 30000, includeAutoGenerated = true } = options;
103
- const playerResponse = await fetchPlayerResponse(videoId, timeout);
122
+ const { languages = ["en"], timeout = 30000, includeAutoGenerated = true, proxy } = options;
123
+ const playerResponse = await fetchPlayerResponse(videoId, timeout, proxy);
104
124
  const captionTracks = playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
105
125
  if (!captionTracks?.length) {
106
126
  throw new Error("No captions available for this video");
@@ -109,7 +129,7 @@ async function fetchTranscript(videoId, options = {}) {
109
129
  if (!selectedTrack) {
110
130
  throw new Error("No suitable caption track found");
111
131
  }
112
- const segments = await fetchCaptionTrack(selectedTrack.baseUrl, timeout);
132
+ const segments = await fetchCaptionTrack(selectedTrack.baseUrl, timeout, proxy);
113
133
  if (!segments.length) {
114
134
  throw new Error("Caption track is empty");
115
135
  }
@@ -122,6 +142,8 @@ async function fetchTranscript(videoId, options = {}) {
122
142
  isAutoGenerated: selectedTrack.kind === "asr"
123
143
  };
124
144
  }
145
+ var require2 = createRequire(import.meta.url);
146
+ var ProxyAgent = require2("undici/lib/dispatcher/proxy-agent");
125
147
  var USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
126
148
  // node_modules/yocto-queue/index.js
127
149
  class Node {
@@ -344,6 +366,26 @@ async function* streamVideos(videos, options = {}) {
344
366
  var DEFAULT_CONCURRENCY = 4;
345
367
  var DEFAULT_PAUSE_AFTER = 10;
346
368
  var DEFAULT_PAUSE_DURATION = 5000;
369
+ // src/lib/fs.ts
370
+ import {readFile, writeFile, appendFile, access, constants} from "node:fs/promises";
371
+ async function fileExists(path) {
372
+ try {
373
+ await access(path, constants.F_OK);
374
+ return true;
375
+ } catch {
376
+ return false;
377
+ }
378
+ }
379
+ async function readTextFile(path) {
380
+ return readFile(path, "utf-8");
381
+ }
382
+ async function writeTextFile(path, content) {
383
+ await writeFile(path, content, "utf-8");
384
+ }
385
+ async function appendTextFile(path, content) {
386
+ await appendFile(path, content, "utf-8");
387
+ }
388
+
347
389
  // src/loaders/history.ts
348
390
  function extractVideoIdFromUrl(url) {
349
391
  try {
@@ -359,8 +401,7 @@ function extractVideoIdFromUrl(url) {
359
401
  return null;
360
402
  }
361
403
  async function loadWatchHistory(filePath) {
362
- const file = Bun.file(filePath);
363
- const text = await file.text();
404
+ const text = await readTextFile(filePath);
364
405
  const data = JSON.parse(text);
365
406
  const results = [];
366
407
  for (const item of data) {
@@ -424,8 +465,7 @@ function parseCSVLine(line) {
424
465
  return result;
425
466
  }
426
467
  async function loadWatchLater(filePath) {
427
- const file = Bun.file(filePath);
428
- const text = await file.text();
468
+ const text = await readTextFile(filePath);
429
469
  const rows = parseCSV(text);
430
470
  const results = [];
431
471
  for (const row of rows) {
@@ -471,11 +511,11 @@ function mergeVideoSources(...sources) {
471
511
  async function loadProcessedIds(jsonlPath) {
472
512
  const ids = new Set;
473
513
  try {
474
- const file = Bun.file(jsonlPath);
475
- if (!await file.exists()) {
514
+ const exists = await fileExists(jsonlPath);
515
+ if (!exists) {
476
516
  return ids;
477
517
  }
478
- const text = await file.text();
518
+ const text = await readTextFile(jsonlPath);
479
519
  const lines = text.split("\n").filter((l) => l.trim());
480
520
  for (const line of lines) {
481
521
  try {
@@ -497,16 +537,13 @@ async function writeJsonl(results, options) {
497
537
  const lines = results.map((r) => JSON.stringify(r));
498
538
  const content = `${lines.join("\n")}\n`;
499
539
  if (options.append) {
500
- await Bun.write(options.path, content, { mode: "a" });
540
+ await appendTextFile(options.path, content);
501
541
  } else {
502
- await Bun.write(options.path, content);
542
+ await writeTextFile(options.path, content);
503
543
  }
504
544
  }
505
545
  async function appendJsonl(result, path) {
506
- const file = Bun.file(path);
507
- const existing = await file.exists() ? await file.text() : "";
508
- const newContent = `${existing + JSON.stringify(result)}\n`;
509
- await Bun.write(path, newContent);
546
+ await appendTextFile(path, `${JSON.stringify(result)}\n`);
510
547
  }
511
548
  async function writeCsv(results, options) {
512
549
  const headers = [
@@ -538,12 +575,12 @@ async function writeCsv(results, options) {
538
575
  ...rows.map((row) => row.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(","))
539
576
  ].join("\n");
540
577
  if (options.append) {
541
- const file = Bun.file(options.path);
542
- const existing = await file.exists() ? await file.text() : "";
543
- const content = existing ? `${rows.map((row) => row.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(",")).join("\n")}\n` : `${csvContent}\n`;
544
- await Bun.write(options.path, existing + content);
578
+ const exists = await fileExists(options.path);
579
+ const rowsContent = rows.map((row) => row.map((cell) => `"${String(cell).replace(/"/g, '""')}"`).join(",")).join("\n");
580
+ const content = exists ? `${rowsContent}\n` : `${csvContent}\n`;
581
+ await appendTextFile(options.path, content);
545
582
  } else {
546
- await Bun.write(options.path, `${csvContent}\n`);
583
+ await writeTextFile(options.path, `${csvContent}\n`);
547
584
  }
548
585
  }
549
586
  function formatSrt(transcript) {
@@ -610,6 +647,7 @@ export {
610
647
  formatVtt,
611
648
  formatText,
612
649
  formatSrt,
650
+ fetchVideoInfo,
613
651
  fetchTranscript,
614
652
  extractVideoId,
615
653
  appendJsonl
@@ -0,0 +1,26 @@
1
+ /**
2
+ * YouTube transcript fetcher using YouTube's innertube API
3
+ * No third-party services required
4
+ */
5
+ import type { FetchOptions, Transcript } from '../types';
6
+ export interface CaptionTrack {
7
+ baseUrl: string;
8
+ languageCode: string;
9
+ kind?: string;
10
+ name?: {
11
+ simpleText?: string;
12
+ };
13
+ }
14
+ /**
15
+ * Extract video ID from various YouTube URL formats
16
+ */
17
+ export declare function extractVideoId(input: string): string | null;
18
+ /**
19
+ * Fetch available caption tracks for a video
20
+ */
21
+ export declare function fetchVideoInfo(videoId: string, options?: FetchOptions): Promise<CaptionTrack[]>;
22
+ /**
23
+ * Fetch transcript for a single video
24
+ */
25
+ export declare function fetchTranscript(videoId: string, options?: FetchOptions): Promise<Transcript>;
26
+ //# sourceMappingURL=fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetcher.d.ts","sourceRoot":"","sources":["../../src/lib/fetcher.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAe,UAAU,EAAqB,MAAM,UAAU,CAAC;AASzF,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAChC;AAUD;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA+B3D;AAsJD;;GAEG;AACH,wBAAsB,cAAc,CAClC,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,YAAY,EAAE,CAAC,CAIzB;AAED;;GAEG;AACH,wBAAsB,eAAe,CACnC,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,UAAU,CAAC,CAgCrB"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Cross-runtime file utilities (works with Node.js and Bun)
3
+ */
4
+ /**
5
+ * Check if a file exists
6
+ */
7
+ export declare function fileExists(path: string): Promise<boolean>;
8
+ /**
9
+ * Read file contents as text
10
+ */
11
+ export declare function readTextFile(path: string): Promise<string>;
12
+ /**
13
+ * Write content to file (overwrites existing)
14
+ */
15
+ export declare function writeTextFile(path: string, content: string): Promise<void>;
16
+ /**
17
+ * Append content to file
18
+ */
19
+ export declare function appendTextFile(path: string, content: string): Promise<void>;
20
+ //# sourceMappingURL=fs.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fs.d.ts","sourceRoot":"","sources":["../../src/lib/fs.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;GAEG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAO/D;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAEhE;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAEhF;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAEjF"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Bulk transcript processor with concurrency control,
3
+ * rate limiting, and resume support
4
+ */
5
+ import type { BulkOptions, TranscriptResult, WatchHistoryMeta } from '../types';
6
+ /**
7
+ * Process multiple videos in bulk with concurrency control
8
+ */
9
+ export declare function processVideos(videos: WatchHistoryMeta[], options?: BulkOptions): Promise<TranscriptResult[]>;
10
+ /**
11
+ * Create a streaming processor that yields results as they complete
12
+ */
13
+ export declare function streamVideos(videos: WatchHistoryMeta[], options?: BulkOptions): AsyncGenerator<TranscriptResult>;
14
+ //# sourceMappingURL=processor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../../src/lib/processor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAOhF;;GAEG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,gBAAgB,EAAE,EAC1B,OAAO,GAAE,WAAgB,GACxB,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA2D7B;AAED;;GAEG;AACH,wBAAuB,YAAY,CACjC,MAAM,EAAE,gBAAgB,EAAE,EAC1B,OAAO,GAAE,WAAgB,GACxB,cAAc,CAAC,gBAAgB,CAAC,CAwClC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Load YouTube watch history from Google Takeout JSON
3
+ */
4
+ import type { WatchHistoryMeta } from '../types';
5
+ /**
6
+ * Load watch history from Google Takeout JSON file
7
+ */
8
+ export declare function loadWatchHistory(filePath: string): Promise<WatchHistoryMeta[]>;
9
+ //# sourceMappingURL=history.d.ts.map