broadcastx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broadcastx/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """BroadcastX - Discover and download X/Twitter broadcast videos."""
2
+
3
+ __version__ = "0.1.0"
broadcastx/cli.py ADDED
@@ -0,0 +1,244 @@
1
+ """
2
+ BroadcastX CLI — Discover and download X/Twitter broadcast videos.
3
+
4
+ Usage:
5
+ broadcastx scan @username
6
+ broadcastx download https://x.com/i/broadcasts/...
7
+ broadcastx download --from broadcasts.json
8
+ """
9
+
10
+ import asyncio
11
+ from pathlib import Path
12
+
13
+ import click
14
+ from rich.console import Console
15
+
16
+ from . import __version__
17
+ from .config import DEFAULT_BROADCASTS_FILE, DEFAULT_BROWSER, DEFAULT_VIDEOS_DIR
18
+ from .downloader import check_ffmpeg, check_yt_dlp, download_all, download_broadcast
19
+ from .monitor import monitor_user
20
+ from .pause_detector import detect_pauses, pause_report, trim_intervals
21
+ from .scanner import scan_user
22
+ from .scrape_broadcasts import scrape_broadcasts
23
+
24
+ console = Console()
25
+
26
+
27
+ @click.group()
28
+ @click.version_option(version=__version__, prog_name="broadcastx")
29
+ def main():
30
+ """BroadcastX — Discover and download X/Twitter broadcast videos."""
31
+ pass
32
+
33
+
34
+ @main.command()
35
+ @click.argument("username")
36
+ @click.option("--max-scrolls", "-n", default=100, help="Maximum scroll actions (default: 100)")
37
+ @click.option("--scroll-delay", "-d", default=2.0, help="Delay between scrolls in seconds (default: 2.0)")
38
+ @click.option("--idle-timeout", "-t", default=10.0, help="Stop after N seconds with no new data (default: 10)")
39
+ @click.option("--output", "-o", default=None, help="Output JSON file path")
40
+ @click.option("--headless/--no-headless", default=False, help="Run browser headless (default: visible)")
41
+ def scan(username, max_scrolls, scroll_delay, idle_timeout, output, headless):
42
+ """Scan a user's timeline for broadcast links.
43
+
44
+ USERNAME can be with or without @ (e.g., @elonmusk or elonmusk).
45
+ """
46
+ asyncio.run(scan_user(
47
+ username=username,
48
+ max_scrolls=max_scrolls,
49
+ scroll_delay=scroll_delay,
50
+ idle_timeout=idle_timeout,
51
+ headless=headless,
52
+ output_file=output,
53
+ ))
54
+
55
+
56
+ @main.command()
57
+ @click.argument("username")
58
+ @click.option("--output", "-o", default=None, help="Output JSON file path")
59
+ @click.option("--delay", default=1.0, help="Delay between API calls in seconds (default: 1.0)")
60
+ @click.option("--headless/--no-headless", default=False, help="Run browser headless (default: visible)")
61
+ @click.option("--verbose", "-v", is_flag=True, help="Show detailed output")
62
+ @click.option("--fresh", is_flag=True, help="Ignore saved state, start from beginning")
63
+ @click.option("--auth-token", default=None, help="Manual auth_token cookie (skips browser)")
64
+ @click.option("--csrf-token", default=None, help="Manual ct0/CSRF token (skips browser)")
65
+ @click.option("--user-id", default=None, help="Manual user ID (skips user ID lookup)")
66
+ def scrape(username, output, delay, headless, verbose, fresh, auth_token, csrf_token, user_id):
67
+ """Scrape ALL past broadcasts from a user's timeline.
68
+
69
+ Uses GraphQL API pagination. Saves cursor state so you can resume
70
+ after rate limits. Run the same command again to continue.
71
+
72
+ USERNAME can be with or without @ (e.g., @SpaceX or SpaceX).
73
+
74
+ Examples:
75
+
76
+ broadcastx scrape @SpaceX
77
+
78
+ broadcastx scrape @SpaceX --fresh # ignore saved state
79
+
80
+ broadcastx scrape @SpaceX --delay 2.0 -v
81
+ """
82
+ if fresh:
83
+ from .scrape_broadcasts import _state_file
84
+ state_path = _state_file(username.lstrip("@"))
85
+ if state_path.exists():
86
+ state_path.unlink()
87
+ console.print(f"[dim]Cleared saved state: {state_path}[/dim]")
88
+
89
+ asyncio.run(scrape_broadcasts(
90
+ username=username,
91
+ headless=headless,
92
+ output_file=output,
93
+ delay=delay,
94
+ verbose=verbose,
95
+ auth_token=auth_token,
96
+ csrf_token=csrf_token,
97
+ user_id=user_id,
98
+ ))
99
+
100
+
101
+ @main.command()
102
+ @click.argument("username")
103
+ @click.option("--check-interval", default=30 * 60, help="Seconds between profile checks (default: 1800)")
104
+ @click.option("--live-interval", default=5 * 60, help="Seconds between live-status checks (default: 300)")
105
+ @click.option("--output", "-o", default=None, help="Monitor event JSON file path")
106
+ @click.option("--output-dir", default=None, help="Directory for downloaded videos")
107
+ @click.option("--browser", "-b", default=DEFAULT_BROWSER, help=f"Browser for yt-dlp cookies (default: {DEFAULT_BROWSER})")
108
+ @click.option("--headless/--no-headless", default=False, help="Run browser headless (default: visible)")
109
+ @click.option("--download/--no-download", default=True, help="Download when broadcast ends (default: download)")
110
+ @click.option("--once", is_flag=True, help="Run one detection cycle, useful for testing")
111
+ def monitor(username, check_interval, live_interval, output, output_dir, browser, headless, download, once):
112
+ """Monitor a profile for current live broadcasts and download ended replays.
113
+
114
+ USERNAME can be with or without @ (e.g., @SpaceX or SpaceX).
115
+ """
116
+ if download and not check_yt_dlp():
117
+ console.print("[red]✗ yt-dlp not found.[/red]")
118
+ console.print(" Install with: [bold]brew install yt-dlp[/bold]")
119
+ raise SystemExit(1)
120
+
121
+ if download and not check_ffmpeg():
122
+ console.print("[red]✗ ffmpeg not found.[/red]")
123
+ console.print(" Install with: [bold]brew install ffmpeg[/bold]")
124
+ raise SystemExit(1)
125
+
126
+ asyncio.run(monitor_user(
127
+ username=username,
128
+ check_interval=check_interval,
129
+ live_interval=live_interval,
130
+ headless=headless,
131
+ output_file=output,
132
+ output_dir=output_dir,
133
+ browser=browser,
134
+ download=download,
135
+ once=once,
136
+ ))
137
+
138
+
139
+ @main.command()
140
+ @click.argument("urls", nargs=-1)
141
+ @click.option("--from", "from_file", default=None, type=click.Path(), help="Load URLs from a JSON file")
142
+ @click.option("--output-dir", "-o", default=None, help="Output directory for videos")
143
+ @click.option("--browser", "-b", default=DEFAULT_BROWSER, help=f"Browser for cookies (default: {DEFAULT_BROWSER})")
144
+ @click.option("--verbose", "-v", is_flag=True, help="Show yt-dlp output")
145
+ @click.option("--parallel", "-p", default=1, help="Number of concurrent downloads (default: 1)")
146
+ def download(urls, from_file, output_dir, browser, verbose, parallel):
147
+ """Download broadcast video(s).
148
+
149
+ Pass one or more broadcast URLs directly, or use --from to load from a JSON file.
150
+
151
+ Examples:
152
+
153
+ broadcastx download https://x.com/i/broadcasts/1vAxRkBbDRzKl
154
+
155
+ broadcastx download --from output/broadcasts.json
156
+
157
+ broadcastx download --from output/broadcasts.json -o ./my_videos
158
+
159
+ Rotation correction is applied automatically: if the broadcast carries
160
+ phone-orientation metadata, the downloaded video is re-encoded so it
161
+ displays upright. A `.rotation.jsonl` sidecar is also written alongside
162
+ the video for inspection.
163
+ """
164
+ # Pre-flight checks
165
+ if not check_yt_dlp():
166
+ console.print("[red]✗ yt-dlp not found.[/red]")
167
+ console.print(" Install with: [bold]brew install yt-dlp[/bold]")
168
+ raise SystemExit(1)
169
+
170
+ if not check_ffmpeg():
171
+ console.print("[red]✗ ffmpeg not found.[/red]")
172
+ console.print(" Install with: [bold]brew install ffmpeg[/bold]")
173
+ raise SystemExit(1)
174
+
175
+ if not urls and not from_file:
176
+ console.print("[yellow]Provide URLs or use --from <file>.[/yellow]")
177
+ raise SystemExit(1)
178
+
179
+ out = Path(output_dir) if output_dir else DEFAULT_VIDEOS_DIR
180
+
181
+ results = download_all(
182
+ urls=list(urls),
183
+ from_file=from_file,
184
+ output_dir=out,
185
+ browser=browser,
186
+ verbose=verbose,
187
+ parallel=parallel,
188
+ )
189
+
190
+ # Exit with error code if any downloads failed
191
+ if any(not r.success for r in results):
192
+ raise SystemExit(1)
193
+
194
+
195
+ if __name__ == "__main__":
196
+ main()
197
+ @main.command()
198
+ @click.argument("broadcast_url")
199
+ @click.option("--browser", "-b", default=DEFAULT_BROWSER, help=f"Browser for cookies (default: {DEFAULT_BROWSER})")
200
+ @click.option("--trim/--detect-only", default=False, help="Actually trim paused sections (default: detect only)")
201
+ @click.option("--output", "-o", default=None, help="Output video for --trim (default: <video>.trimmed.mp4)")
202
+ @click.option("--size-ratio", default=0.50, help="Size-drop threshold (default 0.50)")
203
+ @click.option("--gap-density", default=0.50, help="PDT-gap density threshold (default 0.50)")
204
+ @click.option("--min-pause", default=10.0, help="Minimum pause duration in seconds (default 10)")
205
+ def trim_pauses(broadcast_url, browser, trim, output, size_ratio, gap_density, min_pause):
206
+ """Detect (and optionally trim) paused sections in a broadcast.
207
+
208
+ Analyses HLS segments via HTTP HEAD requests (no full download) and
209
+ playlist PDT timestamps to find sections where the video was paused
210
+ while audio continued. Default: detect-only. Pass --trim to cut.
211
+ """
212
+ if trim and not check_ffmpeg():
213
+ console.print("[red]ffmpeg not found - install with: brew install ffmpeg[/red]")
214
+ raise SystemExit(1)
215
+
216
+ console.print("[bold]Analysing HLS segments for pauses...[/bold]")
217
+
218
+ pauses = detect_pauses(
219
+ broadcast_url,
220
+ browser=browser,
221
+ size_ratio_threshold=size_ratio,
222
+ gap_density_threshold=gap_density,
223
+ min_pause_sec=min_pause,
224
+ )
225
+
226
+ console.print(pause_report(pauses))
227
+
228
+ if trim and pauses:
229
+ video_path = Path("output") / "videos" / f"{broadcast_url.split('/')[-1]}.mp4"
230
+ if not video_path.exists():
231
+ console.print(f"[red]Video not found: {video_path}")
232
+ console.print(" Download first: broadcastx download <url>")
233
+ raise SystemExit(1)
234
+
235
+ out = Path(output) if output else Path(str(video_path).replace(".mp4", ".trimmed.mp4"))
236
+ console.print(f"\n[bold]Trimming -> {out}...")
237
+ try:
238
+ trim_intervals(video_path, pauses, out)
239
+ console.print(f" [green]Done -> {out}")
240
+ except Exception as e:
241
+ console.print(f" [red]Failed: {e}")
242
+ raise SystemExit(1)
243
+ elif trim and not pauses:
244
+ console.print("[green]Nothing to trim.")
broadcastx/config.py ADDED
@@ -0,0 +1,67 @@
1
+ """Shared configuration and constants for BroadcastX."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ # Default output directory (relative to cwd)
7
+ DEFAULT_OUTPUT_DIR = Path("output")
8
+ DEFAULT_VIDEOS_DIR = DEFAULT_OUTPUT_DIR / "videos"
9
+ DEFAULT_BROADCASTS_FILE = DEFAULT_OUTPUT_DIR / "broadcasts.json"
10
+
11
+ # Browser to extract cookies from (for yt-dlp)
12
+ DEFAULT_BROWSER = "chrome"
13
+
14
+ # X broadcast IDs observed in real broadcast URLs are opaque alphanumeric
15
+ # tokens, e.g. 1vAxRkBbDRzKl. Reject tiny fragments such as /broadcasts/1.
16
+ BROADCAST_ID_RE = r"[A-Za-z0-9]{8,}"
17
+
18
+ # Broadcast URL patterns to match
19
+ BROADCAST_PATTERNS = [
20
+ re.compile(rf"https?://(?:x|twitter)\.com/i/broadcasts/({BROADCAST_ID_RE})(?![A-Za-z0-9_])"),
21
+ re.compile(rf"https?://(?:www\.)?pscp\.tv/w/({BROADCAST_ID_RE})(?![A-Za-z0-9_])"),
22
+ ]
23
+
24
+ # Twitter GraphQL endpoints to intercept
25
+ GRAPHQL_ENDPOINTS = [
26
+ "UserTweets",
27
+ "UserTweetsAndReplies",
28
+ "TweetDetail",
29
+ "SearchTimeline",
30
+ ]
31
+
32
+ # Scanner defaults
33
+ DEFAULT_MAX_SCROLLS = 100 # Maximum number of scroll actions
34
+ DEFAULT_SCROLL_DELAY = 2.0 # Seconds between scrolls
35
+ DEFAULT_IDLE_TIMEOUT = 10.0 # Stop after N seconds with no new tweets
36
+ DEFAULT_HEADLESS = False # Show browser by default (useful for login)
37
+
38
+ # yt-dlp output template — uses broadcast ID as filename
39
+ YTDLP_OUTPUT_TEMPLATE = "%(id)s [%(timestamp>%Y-%m-%d %H.%M.%S)s] %(title)s.%(ext)s"
40
+
41
+
42
+ def ensure_output_dirs():
43
+ """Create output directories if they don't exist."""
44
+ DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
45
+ DEFAULT_VIDEOS_DIR.mkdir(parents=True, exist_ok=True)
46
+
47
+
48
+ def extract_broadcast_id(url: str) -> str | None:
49
+ """Extract broadcast ID from a broadcast URL."""
50
+ for pattern in BROADCAST_PATTERNS:
51
+ match = pattern.search(url)
52
+ if match:
53
+ return match.group(1)
54
+ return None
55
+
56
+
57
+ def is_broadcast_url(url: str) -> bool:
58
+ """Check if a URL is a broadcast URL."""
59
+ return extract_broadcast_id(url) is not None
60
+
61
+
62
+ def normalize_broadcast_url(url: str) -> str | None:
63
+ """Normalize a broadcast URL to the canonical x.com format."""
64
+ bid = extract_broadcast_id(url)
65
+ if bid:
66
+ return f"https://x.com/i/broadcasts/{bid}"
67
+ return None
@@ -0,0 +1,337 @@
1
+ """
2
+ Downloader module — Download broadcast videos using yt-dlp.
3
+
4
+ Usage:
5
+ from broadcastx.downloader import download_broadcast, download_all
6
+
7
+ # Single download
8
+ download_broadcast("https://x.com/i/broadcasts/1vAxRkBbDRzKl")
9
+
10
+ # Batch download from JSON file
11
+ download_all("broadcasts.json", output_dir="./videos")
12
+ """
13
+
14
+ import json
15
+ import shutil
16
+ import subprocess
17
+ import threading
18
+ from concurrent.futures import ThreadPoolExecutor, as_completed
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+
22
+ from rich.console import Console
23
+
24
+ from .config import (
25
+ DEFAULT_BROWSER,
26
+ DEFAULT_VIDEOS_DIR,
27
+ YTDLP_OUTPUT_TEMPLATE,
28
+ extract_broadcast_id,
29
+ is_broadcast_url,
30
+ normalize_broadcast_url,
31
+ )
32
+ from .rotation import extract_rotation_sidecar
33
+ from .rotation import rotate_video as _apply_rotation
34
+
35
+ console = Console()
36
+
37
+
38
+ @dataclass
39
+ class DownloadResult:
40
+ """Result of a single broadcast download attempt."""
41
+ url: str
42
+ success: bool
43
+ output_file: str | None = None
44
+ rotation_sidecar_file: str | None = None
45
+ rotation_applied: bool = False
46
+ error: str | None = None
47
+ warning: str | None = None
48
+
49
+
50
+ def check_yt_dlp() -> bool:
51
+ """Check if yt-dlp is installed and accessible."""
52
+ return shutil.which("yt-dlp") is not None
53
+
54
+
55
+ def check_ffmpeg() -> bool:
56
+ """Check if ffmpeg is installed and accessible."""
57
+ return shutil.which("ffmpeg") is not None
58
+
59
+
60
+ def download_broadcast(
61
+ url: str,
62
+ output_dir: Path = DEFAULT_VIDEOS_DIR,
63
+ browser: str = DEFAULT_BROWSER,
64
+ verbose: bool = False,
65
+ ) -> DownloadResult:
66
+ """
67
+ Download a single broadcast video using yt-dlp.
68
+
69
+ No timeout — broadcasts can be hours long. yt-dlp runs until completion
70
+ with live output streamed to the terminal.
71
+
72
+ Args:
73
+ url: Broadcast URL (x.com/i/broadcasts/... or pscp.tv/w/...)
74
+ output_dir: Directory to save the video
75
+ browser: Browser to extract cookies from
76
+ verbose: Show yt-dlp output
77
+
78
+ Returns:
79
+ DownloadResult with success status and output file path
80
+ """
81
+ normalized = normalize_broadcast_url(url)
82
+ if not normalized:
83
+ return DownloadResult(
84
+ url=url,
85
+ success=False,
86
+ error=f"Not a valid broadcast URL: {url}",
87
+ )
88
+
89
+ broadcast_id = extract_broadcast_id(url)
90
+ output_dir = Path(output_dir)
91
+ output_dir.mkdir(parents=True, exist_ok=True)
92
+
93
+ output_template = str(output_dir / YTDLP_OUTPUT_TEMPLATE)
94
+
95
+ cmd = [
96
+ "yt-dlp",
97
+ "--cookies-from-browser", browser,
98
+ "-f", "bestvideo+bestaudio/best",
99
+ "--output", output_template,
100
+ "--merge-output-format", "mp4",
101
+ "--no-warnings",
102
+ "--newline", # Progress on new lines
103
+ "--no-overwrites", # Skip already downloaded
104
+ normalized,
105
+ ]
106
+
107
+ console.print(f" [dim]Downloading {broadcast_id}...[/dim]")
108
+ if verbose:
109
+ console.print(f" [dim]$ {' '.join(cmd)}[/dim]")
110
+
111
+ try:
112
+ # Stream output live — no timeout, broadcasts can be very long
113
+ process = subprocess.Popen(
114
+ cmd,
115
+ stdout=subprocess.PIPE,
116
+ stderr=subprocess.STDOUT,
117
+ text=True,
118
+ )
119
+
120
+ output_lines = []
121
+ output_file = None
122
+
123
+ # Start rotation extraction in parallel with yt-dlp download
124
+ # Both read from CDN independently; no need to wait.
125
+ rotation_result: list = []
126
+ def _run_sidecar():
127
+ try:
128
+ sc = extract_rotation_sidecar(normalized, output_dir, browser=browser)
129
+ rotation_result.append(("ok", str(sc)))
130
+ except Exception as e:
131
+ rotation_result.append(("error", e))
132
+
133
+ rotation_thread = threading.Thread(target=_run_sidecar, daemon=True)
134
+ rotation_thread.start()
135
+
136
+ for line in process.stdout:
137
+ line = line.rstrip()
138
+ output_lines.append(line)
139
+
140
+ # Always show progress/status lines
141
+ if verbose or "[download]" in line or "[Merger]" in line or "already" in line.lower():
142
+ console.print(f" [dim]{line}[/dim]")
143
+
144
+ # Try to capture the output filename
145
+ if "Merging formats into" in line and '"' in line:
146
+ start = line.index('"') + 1
147
+ end = line.rindex('"')
148
+ output_file = line[start:end]
149
+ elif "Destination:" in line:
150
+ output_file = line.split("Destination:")[-1].strip()
151
+
152
+ process.wait()
153
+
154
+ if process.returncode == 0:
155
+ sidecar_file = None
156
+ warning = None
157
+ rotation_thread.join(timeout=300)
158
+ if rotation_result:
159
+ r = rotation_result[0]
160
+ kind, value = r
161
+ if kind == "error":
162
+ warning = f"Rotation sidecar failed: {value}"
163
+ console.print(f" [yellow]{warning}[/yellow]")
164
+ else:
165
+ sidecar_file = value
166
+ console.print(f" [green]Rotation sidecar[/green] → {sidecar_file}")
167
+ rotation_applied = False
168
+ if sidecar_file and output_file and Path(output_file).exists():
169
+ try:
170
+ _apply_rotation(output_file, sidecar_file)
171
+ # rotate_video returns the same path when it replaces in-place
172
+ rotation_applied = True
173
+ console.print(f" [green]Rotation corrected[/green] → {output_file}")
174
+ except subprocess.CalledProcessError as e:
175
+ err = (e.stderr.decode("utf-8", "replace") if isinstance(e.stderr, bytes) else (e.stderr or ""))
176
+ w = f"Rotation correction failed (exit {e.returncode}): {err[:400]}"
177
+ warning = f"{warning}; {w}" if warning else w
178
+ console.print(f" [yellow]{w}[/yellow]")
179
+ except Exception as e:
180
+ w = f"Rotation correction failed: {e!r}"
181
+ warning = f"{warning}; {w}" if warning else w
182
+ console.print(f" [yellow]{w}[/yellow]")
183
+ return DownloadResult(
184
+ url=normalized,
185
+ success=True,
186
+ output_file=output_file,
187
+ rotation_sidecar_file=sidecar_file,
188
+ rotation_applied=rotation_applied,
189
+ warning=warning,
190
+ )
191
+ else:
192
+ # Extract error from output
193
+ error_msg = "\n".join(output_lines[-3:]) or f"yt-dlp exited with code {process.returncode}"
194
+ return DownloadResult(
195
+ url=normalized,
196
+ success=False,
197
+ error=error_msg,
198
+ )
199
+ except FileNotFoundError:
200
+ return DownloadResult(
201
+ url=normalized,
202
+ success=False,
203
+ error="yt-dlp not found. Install it with: brew install yt-dlp",
204
+ )
205
+
206
+
207
+ def download_all(
208
+ urls: list[str] | None = None,
209
+ from_file: Path | str | None = None,
210
+ output_dir: Path = DEFAULT_VIDEOS_DIR,
211
+ browser: str = DEFAULT_BROWSER,
212
+ verbose: bool = False,
213
+ parallel: int = 1,
214
+ ) -> list[DownloadResult]:
215
+ """
216
+ Download multiple broadcast videos.
217
+
218
+ Args:
219
+ urls: List of broadcast URLs
220
+ from_file: Path to JSON file containing broadcast data
221
+ output_dir: Directory to save videos
222
+ browser: Browser to extract cookies from
223
+ verbose: Show yt-dlp output
224
+ parallel: Number of concurrent downloads (default: 1 = sequential)
225
+
226
+ Returns:
227
+ List of DownloadResult objects
228
+ """
229
+ all_urls = list(urls or [])
230
+
231
+ # Load URLs from file if provided
232
+ if from_file:
233
+ file_urls = _load_urls_from_file(Path(from_file))
234
+ all_urls.extend(file_urls)
235
+
236
+ if not all_urls:
237
+ console.print("[yellow]No broadcast URLs to download.[/yellow]")
238
+ return []
239
+
240
+ # Deduplicate while preserving order
241
+ seen = set()
242
+ unique_urls = []
243
+ for url in all_urls:
244
+ normalized = normalize_broadcast_url(url)
245
+ if normalized and normalized not in seen:
246
+ seen.add(normalized)
247
+ unique_urls.append(normalized)
248
+
249
+ total = len(unique_urls)
250
+ console.print(f"\n[bold]Downloading {total} broadcast(s)" + (f" ({parallel} parallel)" if parallel > 1 else "") + f"...[/bold]\n")
251
+
252
+ if parallel <= 1:
253
+ # Sequential download
254
+ results = []
255
+ for i, url in enumerate(unique_urls, 1):
256
+ console.print(f"[bold][{i}/{total}][/bold] {url}")
257
+ result = download_broadcast(url, output_dir=output_dir, browser=browser, verbose=verbose)
258
+ results.append(result)
259
+ if result.success:
260
+ console.print(f" [green]✓ Done[/green]" + (f" → {result.output_file}" if result.output_file else ""))
261
+ else:
262
+ console.print(f" [red]✗ Failed: {result.error}[/red]")
263
+ console.print()
264
+ else:
265
+ # Parallel download
266
+ results = [None] * total
267
+ lock = threading.Lock()
268
+ completed_count = 0
269
+
270
+ def _download_one(index: int, url: str) -> tuple[int, DownloadResult]:
271
+ return index, download_broadcast(url, output_dir=output_dir, browser=browser, verbose=verbose)
272
+
273
+ with ThreadPoolExecutor(max_workers=parallel) as executor:
274
+ futures = {
275
+ executor.submit(_download_one, i, url): (i, url)
276
+ for i, url in enumerate(unique_urls)
277
+ }
278
+
279
+ for future in as_completed(futures):
280
+ idx, result = future.result()
281
+ results[idx] = result
282
+ url = unique_urls[idx]
283
+ bid = extract_broadcast_id(url)
284
+
285
+ with lock:
286
+ completed_count += 1
287
+ if result.success:
288
+ console.print(f" [green]✓[/green] [{completed_count}/{total}] {bid}" + (f" → {result.output_file}" if result.output_file else ""))
289
+ else:
290
+ console.print(f" [red]✗[/red] [{completed_count}/{total}] {bid}: {result.error}")
291
+
292
+ console.print()
293
+
294
+ # Summary
295
+ succeeded = sum(1 for r in results if r and r.success)
296
+ failed = sum(1 for r in results if r and not r.success)
297
+ console.print(f"[bold]Done:[/bold] {succeeded} succeeded, {failed} failed")
298
+
299
+ return results
300
+
301
+
302
+ def _load_urls_from_file(path: Path) -> list[str]:
303
+ """
304
+ Load broadcast URLs from a JSON file.
305
+
306
+ Supports multiple formats:
307
+ 1. Plain list of URLs: ["https://x.com/i/broadcasts/abc", ...]
308
+ 2. Scanner output: {"broadcasts": [{"url": "..."}, ...]}
309
+ 3. List of objects: [{"url": "..."}, ...]
310
+ """
311
+ if not path.exists():
312
+ console.print(f"[red]File not found: {path}[/red]")
313
+ return []
314
+
315
+ try:
316
+ data = json.loads(path.read_text())
317
+ except json.JSONDecodeError as e:
318
+ console.print(f"[red]Invalid JSON in {path}: {e}[/red]")
319
+ return []
320
+
321
+ urls = []
322
+ if isinstance(data, list):
323
+ for item in data:
324
+ if isinstance(item, str):
325
+ urls.append(item)
326
+ elif isinstance(item, dict) and "url" in item:
327
+ urls.append(item["url"])
328
+ elif isinstance(data, dict) and "broadcasts" in data:
329
+ for item in data["broadcasts"]:
330
+ if isinstance(item, str):
331
+ urls.append(item)
332
+ elif isinstance(item, dict) and "url" in item:
333
+ urls.append(item["url"])
334
+
335
+ valid = [u for u in urls if is_broadcast_url(u)]
336
+ console.print(f" [dim]Loaded {len(valid)} broadcast URL(s) from {path}[/dim]")
337
+ return valid