@nadimtuhin/ytranscript 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"history.d.ts","sourceRoot":"","sources":["../../src/loaders/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AA4BjD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA0BpF"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Unified loader for various input sources
3
+ */
4
+ import type { WatchHistoryMeta } from '../types';
5
+ export { loadWatchHistory } from './history';
6
+ export { loadWatchLater } from './watchLater';
7
+ /**
8
+ * Create metadata entries from video IDs or URLs
9
+ */
10
+ export declare function fromVideoIds(inputs: string[]): WatchHistoryMeta[];
11
+ /**
12
+ * Merge multiple sources, deduplicating by video ID
13
+ * Priority: history > watch_later > manual
14
+ */
15
+ export declare function mergeVideoSources(...sources: WatchHistoryMeta[][]): WatchHistoryMeta[];
16
+ /**
17
+ * Load processed video IDs from an existing JSONL file
18
+ */
19
+ export declare function loadProcessedIds(jsonlPath: string): Promise<Set<string>>;
20
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAIjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C;;GAEG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,gBAAgB,EAAE,CAejE;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,OAAO,EAAE,gBAAgB,EAAE,EAAE,GAAG,gBAAgB,EAAE,CAYtF;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CA6B9E"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Load YouTube watch-later playlist from Google Takeout CSV
3
+ */
4
+ import type { WatchHistoryMeta } from '../types';
5
+ /**
6
+ * Load watch-later playlist from Google Takeout CSV file
7
+ */
8
+ export declare function loadWatchLater(filePath: string): Promise<WatchHistoryMeta[]>;
9
+ //# sourceMappingURL=watchLater.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"watchLater.d.ts","sourceRoot":"","sources":["../../src/loaders/watchLater.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AA6DjD;;GAEG;AACH,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAqBlF"}
package/dist/mcp.d.ts ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * ytranscript MCP Server
4
+ *
5
+ * Exposes YouTube transcript extraction as MCP tools for AI assistants
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=mcp.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":";AACA;;;;GAIG"}
package/dist/mcp.js CHANGED
@@ -28317,6 +28317,7 @@ class StdioServerTransport {
28317
28317
  }
28318
28318
 
28319
28319
  // src/lib/fetcher.ts
28320
+ import {createRequire} from "node:module";
28320
28321
  function extractVideoId(input) {
28321
28322
  if (/^[a-zA-Z0-9_-]{11}$/.test(input)) {
28322
28323
  return input;
@@ -28342,9 +28343,20 @@ function extractVideoId(input) {
28342
28343
  }
28343
28344
  return null;
28344
28345
  }
28345
- async function fetchPlayerResponse(videoId, timeout) {
28346
+ function createProxyAgent(proxy) {
28347
+ if (proxy) {
28348
+ return new ProxyAgent(proxy.url);
28349
+ }
28350
+ const envProxy = process.env.HTTP_PROXY || process.env.http_proxy || process.env.HTTPS_PROXY || process.env.https_proxy;
28351
+ if (envProxy) {
28352
+ return new ProxyAgent(envProxy);
28353
+ }
28354
+ return;
28355
+ }
28356
+ async function fetchPlayerResponse(videoId, timeout, proxy) {
28346
28357
  const controller = new AbortController;
28347
28358
  const timeoutId = setTimeout(() => controller.abort(), timeout);
28359
+ const dispatcher = createProxyAgent(proxy);
28348
28360
  try {
28349
28361
  const response = await fetch("https://www.youtube.com/youtubei/v1/player?prettyPrint=false", {
28350
28362
  method: "POST",
@@ -28361,7 +28373,8 @@ async function fetchPlayerResponse(videoId, timeout) {
28361
28373
  },
28362
28374
  videoId
28363
28375
  }),
28364
- signal: controller.signal
28376
+ signal: controller.signal,
28377
+ ...dispatcher && { dispatcher }
28365
28378
  });
28366
28379
  if (!response.ok) {
28367
28380
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
@@ -28371,14 +28384,16 @@ async function fetchPlayerResponse(videoId, timeout) {
28371
28384
  clearTimeout(timeoutId);
28372
28385
  }
28373
28386
  }
28374
- async function fetchCaptionTrack(url2, timeout) {
28387
+ async function fetchCaptionTrack(url2, timeout, proxy) {
28375
28388
  const controller = new AbortController;
28376
28389
  const timeoutId = setTimeout(() => controller.abort(), timeout);
28390
+ const dispatcher = createProxyAgent(proxy);
28377
28391
  try {
28378
28392
  const jsonUrl = `${url2}&fmt=json3`;
28379
28393
  const response = await fetch(jsonUrl, {
28380
28394
  headers: { "User-Agent": USER_AGENT },
28381
- signal: controller.signal
28395
+ signal: controller.signal,
28396
+ ...dispatcher && { dispatcher }
28382
28397
  });
28383
28398
  if (!response.ok) {
28384
28399
  throw new Error(`HTTP ${response.status}`);
@@ -28417,8 +28432,8 @@ function selectCaptionTrack(tracks, preferredLanguages, includeAutoGenerated) {
28417
28432
  return searchOrder[0] || null;
28418
28433
  }
28419
28434
  async function fetchTranscript(videoId, options = {}) {
28420
- const { languages = ["en"], timeout = 30000, includeAutoGenerated = true } = options;
28421
- const playerResponse = await fetchPlayerResponse(videoId, timeout);
28435
+ const { languages = ["en"], timeout = 30000, includeAutoGenerated = true, proxy } = options;
28436
+ const playerResponse = await fetchPlayerResponse(videoId, timeout, proxy);
28422
28437
  const captionTracks = playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
28423
28438
  if (!captionTracks?.length) {
28424
28439
  throw new Error("No captions available for this video");
@@ -28427,7 +28442,7 @@ async function fetchTranscript(videoId, options = {}) {
28427
28442
  if (!selectedTrack) {
28428
28443
  throw new Error("No suitable caption track found");
28429
28444
  }
28430
- const segments = await fetchCaptionTrack(selectedTrack.baseUrl, timeout);
28445
+ const segments = await fetchCaptionTrack(selectedTrack.baseUrl, timeout, proxy);
28431
28446
  if (!segments.length) {
28432
28447
  throw new Error("Caption track is empty");
28433
28448
  }
@@ -28440,6 +28455,8 @@ async function fetchTranscript(videoId, options = {}) {
28440
28455
  isAutoGenerated: selectedTrack.kind === "asr"
28441
28456
  };
28442
28457
  }
28458
+ var require2 = createRequire(import.meta.url);
28459
+ var ProxyAgent = require2("undici/lib/dispatcher/proxy-agent");
28443
28460
  var USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
28444
28461
 
28445
28462
  // src/outputs/index.ts
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Output writers for different formats
3
+ */
4
+ import type { OutputOptions, Transcript, TranscriptResult } from '../types';
5
+ /**
6
+ * Write results to JSONL format (one JSON object per line)
7
+ */
8
+ export declare function writeJsonl(results: TranscriptResult[], options: OutputOptions): Promise<void>;
9
+ /**
10
+ * Append a single result to JSONL file (for streaming)
11
+ * Uses atomic append to avoid race conditions in concurrent writes
12
+ */
13
+ export declare function appendJsonl(result: TranscriptResult, path: string): Promise<void>;
14
+ /**
15
+ * Write results to CSV format
16
+ */
17
+ export declare function writeCsv(results: TranscriptResult[], options: OutputOptions): Promise<void>;
18
+ /**
19
+ * Format transcript as SRT subtitle format
20
+ */
21
+ export declare function formatSrt(transcript: Transcript): string;
22
+ /**
23
+ * Format transcript as VTT subtitle format
24
+ */
25
+ export declare function formatVtt(transcript: Transcript): string;
26
+ /**
27
+ * Format transcript as plain text
28
+ */
29
+ export declare function formatText(transcript: Transcript, includeTimestamps?: boolean): string;
30
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/outputs/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAE5E;;GAEG;AACH,wBAAsB,UAAU,CAC9B,OAAO,EAAE,gBAAgB,EAAE,EAC3B,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,IAAI,CAAC,CASf;AAED;;;GAGG;AACH,wBAAsB,WAAW,CAAC,MAAM,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAEvF;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAAC,OAAO,EAAE,gBAAgB,EAAE,EAAE,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,CA2CjG;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,UAAU,EAAE,UAAU,GAAG,MAAM,CAcxD;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,UAAU,EAAE,UAAU,GAAG,MAAM,CAaxD;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,UAAU,EAAE,UAAU,EAAE,iBAAiB,UAAQ,GAAG,MAAM,CAMpF"}
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Core types for ytranscript
3
+ */
4
+ /** Proxy configuration for HTTP requests */
5
+ export interface ProxyConfig {
6
+ /** HTTP proxy URL (e.g., "http://user:pass@host:port") */
7
+ url: string;
8
+ }
9
+ /** Represents a single transcript segment with timing information */
10
+ export interface TranscriptSegment {
11
+ /** The text content of this segment */
12
+ text: string;
13
+ /** Start time in seconds */
14
+ start: number;
15
+ /** Duration in seconds */
16
+ duration: number;
17
+ }
18
+ /** The complete transcript for a video */
19
+ export interface Transcript {
20
+ /** Video ID */
21
+ videoId: string;
22
+ /** Full transcript text (joined segments) */
23
+ text: string;
24
+ /** Individual segments with timestamps */
25
+ segments: TranscriptSegment[];
26
+ /** Language code (e.g., 'en', 'es') */
27
+ language: string;
28
+ /** Whether this is auto-generated */
29
+ isAutoGenerated: boolean;
30
+ }
31
+ /** Metadata from Google Takeout watch history */
32
+ export interface WatchHistoryMeta {
33
+ /** Video ID */
34
+ videoId: string;
35
+ /** Video title */
36
+ title?: string;
37
+ /** Full YouTube URL */
38
+ url?: string;
39
+ /** Channel info */
40
+ channel?: {
41
+ name?: string;
42
+ url?: string;
43
+ };
44
+ /** When the video was watched */
45
+ watchedAt?: string;
46
+ /** Source of this entry */
47
+ source: 'history' | 'watch_later' | 'manual';
48
+ }
49
+ /** Result of fetching a transcript */
50
+ export interface TranscriptResult {
51
+ /** Video metadata */
52
+ meta: WatchHistoryMeta;
53
+ /** The transcript (null if failed) */
54
+ transcript: Transcript | null;
55
+ /** Error message if fetch failed */
56
+ error?: string;
57
+ }
58
+ /** Options for fetching transcripts */
59
+ export interface FetchOptions {
60
+ /** Preferred language codes (in order of preference) */
61
+ languages?: string[];
62
+ /** Timeout in milliseconds */
63
+ timeout?: number;
64
+ /** Include auto-generated transcripts */
65
+ includeAutoGenerated?: boolean;
66
+ /** Proxy configuration for avoiding rate limits */
67
+ proxy?: ProxyConfig;
68
+ }
69
+ /** Options for bulk processing */
70
+ export interface BulkOptions extends FetchOptions {
71
+ /** Max concurrent requests */
72
+ concurrency?: number;
73
+ /** Pause duration (ms) after N requests */
74
+ pauseAfter?: number;
75
+ /** Pause duration in ms */
76
+ pauseDuration?: number;
77
+ /** Callback for progress updates */
78
+ onProgress?: (completed: number, total: number, result: TranscriptResult) => void;
79
+ /** Set of already-processed video IDs to skip */
80
+ skipIds?: Set<string>;
81
+ }
82
+ /** Output format types */
83
+ export type OutputFormat = 'json' | 'jsonl' | 'csv' | 'txt' | 'srt' | 'vtt';
84
+ /** Options for output writers */
85
+ export interface OutputOptions {
86
+ /** Output file path */
87
+ path: string;
88
+ /** Whether to append to existing file */
89
+ append?: boolean;
90
+ /** Include timestamps in text output */
91
+ includeTimestamps?: boolean;
92
+ }
93
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,4CAA4C;AAC5C,MAAM,WAAW,WAAW;IAC1B,0DAA0D;IAC1D,GAAG,EAAE,MAAM,CAAC;CACb;AAED,qEAAqE;AACrE,MAAM,WAAW,iBAAiB;IAChC,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;IACb,4BAA4B;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,0CAA0C;AAC1C,MAAM,WAAW,UAAU;IACzB,eAAe;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,6CAA6C;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,0CAA0C;IAC1C,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,qCAAqC;IACrC,eAAe,EAAE,OAAO,CAAC;CAC1B;AAED,iDAAiD;AACjD,MAAM,WAAW,gBAAgB;IAC/B,eAAe;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,kBAAkB;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,uBAAuB;IACvB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,mBAAmB;IACnB,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,GAAG,CAAC,EAAE,MAAM,CAAC;KACd,CAAC;IACF,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,MAAM,EAAE,SAAS,GAAG,aAAa,GAAG,QAAQ,CAAC;CAC9C;AAED,sCAAsC;AACtC,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB;IACrB,IAAI,EAAE,gBAAgB,CAAC;IACvB,sCAAsC;IACtC,UAAU,EAAE,UAAU,GAAG,IAAI,CAAC;IAC9B,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,uCAAuC;AACvC,MAAM,WAAW,YAAY;IAC3B,wDAAwD;IACxD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,8BAA8B;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,mDAAmD;IACnD,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,kCAAkC;AAClC,MAAM,WAAW,WAAY,SAAQ,YAAY;IAC/C,8BAA8B;IAC9B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,2CAA2C;IAC3C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,2BAA2B;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,oCAAoC;IACpC,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAClF,iDAAiD;IACjD,OAAO,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;CACvB;AAED,0BAA0B;AAC1B,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,CAAC;AAE5E,iCAAiC;AACjC,MAAM,WAAW,aAAa;IAC5B,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,yCAAyC;IACzC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wCAAwC;IACxC,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B"}
package/package.json CHANGED
@@ -1,13 +1,12 @@
1
1
  {
2
2
  "name": "@nadimtuhin/ytranscript",
3
- "version": "1.0.2",
3
+ "version": "1.2.0",
4
4
  "description": "Fast YouTube transcript extraction with bulk processing, Google Takeout support, MCP server, and multiple output formats",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "module": "./dist/index.js",
8
8
  "types": "./dist/index.d.ts",
9
9
  "bin": {
10
- "@nadimtuhin/ytranscript": "dist/cli.js",
11
10
  "ytranscript": "dist/cli.js",
12
11
  "ytranscript-mcp": "dist/mcp.js"
13
12
  },
@@ -17,11 +16,10 @@
17
16
  "types": "./dist/index.d.ts"
18
17
  }
19
18
  },
20
- "files": [
21
- "dist"
22
- ],
19
+ "files": ["dist"],
23
20
  "scripts": {
24
- "build": "bun build ./src/index.ts --outdir ./dist --target node && bun build ./src/cli.ts --outdir ./dist --target bun && bun build ./src/mcp.ts --outdir ./dist --target node",
21
+ "build": "bun build ./src/index.ts --outdir ./dist --target node && bun build ./src/cli.ts --outdir ./dist --target node && bun build ./src/mcp.ts --outdir ./dist --target node && tsc --emitDeclarationOnly --declaration --outDir ./dist",
22
+ "build:types": "tsc --emitDeclarationOnly --declaration --outDir ./dist",
25
23
  "dev": "bun run --watch src/cli.ts",
26
24
  "dev:mcp": "bun run --watch src/mcp.ts",
27
25
  "test": "bun test",
@@ -58,9 +56,11 @@
58
56
  "commander": "^12.1.0",
59
57
  "linkedom": "^0.18.0",
60
58
  "p-limit": "^6.1.0",
59
+ "undici": "^7.18.2",
61
60
  "zod": "^4.3.5"
62
61
  },
63
62
  "engines": {
63
+ "node": ">=18.0.0",
64
64
  "bun": ">=1.0.0"
65
65
  }
66
66
  }