broadcastx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- broadcastx/__init__.py +3 -0
- broadcastx/cli.py +244 -0
- broadcastx/config.py +67 -0
- broadcastx/downloader.py +337 -0
- broadcastx/monitor.py +424 -0
- broadcastx/pause_detector.py +382 -0
- broadcastx/rotation.py +564 -0
- broadcastx/scanner.py +290 -0
- broadcastx/scrape_broadcasts.py +909 -0
- broadcastx-0.1.0.dist-info/METADATA +10 -0
- broadcastx-0.1.0.dist-info/RECORD +14 -0
- broadcastx-0.1.0.dist-info/WHEEL +5 -0
- broadcastx-0.1.0.dist-info/entry_points.txt +2 -0
- broadcastx-0.1.0.dist-info/top_level.txt +1 -0
broadcastx/__init__.py
ADDED
broadcastx/cli.py
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BroadcastX CLI — Discover and download X/Twitter broadcast videos.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
broadcastx scan @username
|
|
6
|
+
broadcastx download https://x.com/i/broadcasts/...
|
|
7
|
+
broadcastx download --from broadcasts.json
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
|
|
16
|
+
from . import __version__
|
|
17
|
+
from .config import DEFAULT_BROADCASTS_FILE, DEFAULT_BROWSER, DEFAULT_VIDEOS_DIR
|
|
18
|
+
from .downloader import check_ffmpeg, check_yt_dlp, download_all, download_broadcast
|
|
19
|
+
from .monitor import monitor_user
|
|
20
|
+
from .pause_detector import detect_pauses, pause_report, trim_intervals
|
|
21
|
+
from .scanner import scan_user
|
|
22
|
+
from .scrape_broadcasts import scrape_broadcasts
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@click.group()
|
|
28
|
+
@click.version_option(version=__version__, prog_name="broadcastx")
|
|
29
|
+
def main():
|
|
30
|
+
"""BroadcastX — Discover and download X/Twitter broadcast videos."""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@main.command()
|
|
35
|
+
@click.argument("username")
|
|
36
|
+
@click.option("--max-scrolls", "-n", default=100, help="Maximum scroll actions (default: 100)")
|
|
37
|
+
@click.option("--scroll-delay", "-d", default=2.0, help="Delay between scrolls in seconds (default: 2.0)")
|
|
38
|
+
@click.option("--idle-timeout", "-t", default=10.0, help="Stop after N seconds with no new data (default: 10)")
|
|
39
|
+
@click.option("--output", "-o", default=None, help="Output JSON file path")
|
|
40
|
+
@click.option("--headless/--no-headless", default=False, help="Run browser headless (default: visible)")
|
|
41
|
+
def scan(username, max_scrolls, scroll_delay, idle_timeout, output, headless):
|
|
42
|
+
"""Scan a user's timeline for broadcast links.
|
|
43
|
+
|
|
44
|
+
USERNAME can be with or without @ (e.g., @elonmusk or elonmusk).
|
|
45
|
+
"""
|
|
46
|
+
asyncio.run(scan_user(
|
|
47
|
+
username=username,
|
|
48
|
+
max_scrolls=max_scrolls,
|
|
49
|
+
scroll_delay=scroll_delay,
|
|
50
|
+
idle_timeout=idle_timeout,
|
|
51
|
+
headless=headless,
|
|
52
|
+
output_file=output,
|
|
53
|
+
))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@main.command()
|
|
57
|
+
@click.argument("username")
|
|
58
|
+
@click.option("--output", "-o", default=None, help="Output JSON file path")
|
|
59
|
+
@click.option("--delay", default=1.0, help="Delay between API calls in seconds (default: 1.0)")
|
|
60
|
+
@click.option("--headless/--no-headless", default=False, help="Run browser headless (default: visible)")
|
|
61
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show detailed output")
|
|
62
|
+
@click.option("--fresh", is_flag=True, help="Ignore saved state, start from beginning")
|
|
63
|
+
@click.option("--auth-token", default=None, help="Manual auth_token cookie (skips browser)")
|
|
64
|
+
@click.option("--csrf-token", default=None, help="Manual ct0/CSRF token (skips browser)")
|
|
65
|
+
@click.option("--user-id", default=None, help="Manual user ID (skips user ID lookup)")
|
|
66
|
+
def scrape(username, output, delay, headless, verbose, fresh, auth_token, csrf_token, user_id):
|
|
67
|
+
"""Scrape ALL past broadcasts from a user's timeline.
|
|
68
|
+
|
|
69
|
+
Uses GraphQL API pagination. Saves cursor state so you can resume
|
|
70
|
+
after rate limits. Run the same command again to continue.
|
|
71
|
+
|
|
72
|
+
USERNAME can be with or without @ (e.g., @SpaceX or SpaceX).
|
|
73
|
+
|
|
74
|
+
Examples:
|
|
75
|
+
|
|
76
|
+
broadcastx scrape @SpaceX
|
|
77
|
+
|
|
78
|
+
broadcastx scrape @SpaceX --fresh # ignore saved state
|
|
79
|
+
|
|
80
|
+
broadcastx scrape @SpaceX --delay 2.0 -v
|
|
81
|
+
"""
|
|
82
|
+
if fresh:
|
|
83
|
+
from .scrape_broadcasts import _state_file
|
|
84
|
+
state_path = _state_file(username.lstrip("@"))
|
|
85
|
+
if state_path.exists():
|
|
86
|
+
state_path.unlink()
|
|
87
|
+
console.print(f"[dim]Cleared saved state: {state_path}[/dim]")
|
|
88
|
+
|
|
89
|
+
asyncio.run(scrape_broadcasts(
|
|
90
|
+
username=username,
|
|
91
|
+
headless=headless,
|
|
92
|
+
output_file=output,
|
|
93
|
+
delay=delay,
|
|
94
|
+
verbose=verbose,
|
|
95
|
+
auth_token=auth_token,
|
|
96
|
+
csrf_token=csrf_token,
|
|
97
|
+
user_id=user_id,
|
|
98
|
+
))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@main.command()
|
|
102
|
+
@click.argument("username")
|
|
103
|
+
@click.option("--check-interval", default=30 * 60, help="Seconds between profile checks (default: 1800)")
|
|
104
|
+
@click.option("--live-interval", default=5 * 60, help="Seconds between live-status checks (default: 300)")
|
|
105
|
+
@click.option("--output", "-o", default=None, help="Monitor event JSON file path")
|
|
106
|
+
@click.option("--output-dir", default=None, help="Directory for downloaded videos")
|
|
107
|
+
@click.option("--browser", "-b", default=DEFAULT_BROWSER, help=f"Browser for yt-dlp cookies (default: {DEFAULT_BROWSER})")
|
|
108
|
+
@click.option("--headless/--no-headless", default=False, help="Run browser headless (default: visible)")
|
|
109
|
+
@click.option("--download/--no-download", default=True, help="Download when broadcast ends (default: download)")
|
|
110
|
+
@click.option("--once", is_flag=True, help="Run one detection cycle, useful for testing")
|
|
111
|
+
def monitor(username, check_interval, live_interval, output, output_dir, browser, headless, download, once):
|
|
112
|
+
"""Monitor a profile for current live broadcasts and download ended replays.
|
|
113
|
+
|
|
114
|
+
USERNAME can be with or without @ (e.g., @SpaceX or SpaceX).
|
|
115
|
+
"""
|
|
116
|
+
if download and not check_yt_dlp():
|
|
117
|
+
console.print("[red]✗ yt-dlp not found.[/red]")
|
|
118
|
+
console.print(" Install with: [bold]brew install yt-dlp[/bold]")
|
|
119
|
+
raise SystemExit(1)
|
|
120
|
+
|
|
121
|
+
if download and not check_ffmpeg():
|
|
122
|
+
console.print("[red]✗ ffmpeg not found.[/red]")
|
|
123
|
+
console.print(" Install with: [bold]brew install ffmpeg[/bold]")
|
|
124
|
+
raise SystemExit(1)
|
|
125
|
+
|
|
126
|
+
asyncio.run(monitor_user(
|
|
127
|
+
username=username,
|
|
128
|
+
check_interval=check_interval,
|
|
129
|
+
live_interval=live_interval,
|
|
130
|
+
headless=headless,
|
|
131
|
+
output_file=output,
|
|
132
|
+
output_dir=output_dir,
|
|
133
|
+
browser=browser,
|
|
134
|
+
download=download,
|
|
135
|
+
once=once,
|
|
136
|
+
))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@main.command()
|
|
140
|
+
@click.argument("urls", nargs=-1)
|
|
141
|
+
@click.option("--from", "from_file", default=None, type=click.Path(), help="Load URLs from a JSON file")
|
|
142
|
+
@click.option("--output-dir", "-o", default=None, help="Output directory for videos")
|
|
143
|
+
@click.option("--browser", "-b", default=DEFAULT_BROWSER, help=f"Browser for cookies (default: {DEFAULT_BROWSER})")
|
|
144
|
+
@click.option("--verbose", "-v", is_flag=True, help="Show yt-dlp output")
|
|
145
|
+
@click.option("--parallel", "-p", default=1, help="Number of concurrent downloads (default: 1)")
|
|
146
|
+
def download(urls, from_file, output_dir, browser, verbose, parallel):
|
|
147
|
+
"""Download broadcast video(s).
|
|
148
|
+
|
|
149
|
+
Pass one or more broadcast URLs directly, or use --from to load from a JSON file.
|
|
150
|
+
|
|
151
|
+
Examples:
|
|
152
|
+
|
|
153
|
+
broadcastx download https://x.com/i/broadcasts/1vAxRkBbDRzKl
|
|
154
|
+
|
|
155
|
+
broadcastx download --from output/broadcasts.json
|
|
156
|
+
|
|
157
|
+
broadcastx download --from output/broadcasts.json -o ./my_videos
|
|
158
|
+
|
|
159
|
+
Rotation correction is applied automatically: if the broadcast carries
|
|
160
|
+
phone-orientation metadata, the downloaded video is re-encoded so it
|
|
161
|
+
displays upright. A `.rotation.jsonl` sidecar is also written alongside
|
|
162
|
+
the video for inspection.
|
|
163
|
+
"""
|
|
164
|
+
# Pre-flight checks
|
|
165
|
+
if not check_yt_dlp():
|
|
166
|
+
console.print("[red]✗ yt-dlp not found.[/red]")
|
|
167
|
+
console.print(" Install with: [bold]brew install yt-dlp[/bold]")
|
|
168
|
+
raise SystemExit(1)
|
|
169
|
+
|
|
170
|
+
if not check_ffmpeg():
|
|
171
|
+
console.print("[red]✗ ffmpeg not found.[/red]")
|
|
172
|
+
console.print(" Install with: [bold]brew install ffmpeg[/bold]")
|
|
173
|
+
raise SystemExit(1)
|
|
174
|
+
|
|
175
|
+
if not urls and not from_file:
|
|
176
|
+
console.print("[yellow]Provide URLs or use --from <file>.[/yellow]")
|
|
177
|
+
raise SystemExit(1)
|
|
178
|
+
|
|
179
|
+
out = Path(output_dir) if output_dir else DEFAULT_VIDEOS_DIR
|
|
180
|
+
|
|
181
|
+
results = download_all(
|
|
182
|
+
urls=list(urls),
|
|
183
|
+
from_file=from_file,
|
|
184
|
+
output_dir=out,
|
|
185
|
+
browser=browser,
|
|
186
|
+
verbose=verbose,
|
|
187
|
+
parallel=parallel,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Exit with error code if any downloads failed
|
|
191
|
+
if any(not r.success for r in results):
|
|
192
|
+
raise SystemExit(1)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
if __name__ == "__main__":
|
|
196
|
+
main()
|
|
197
|
+
@main.command()
|
|
198
|
+
@click.argument("broadcast_url")
|
|
199
|
+
@click.option("--browser", "-b", default=DEFAULT_BROWSER, help=f"Browser for cookies (default: {DEFAULT_BROWSER})")
|
|
200
|
+
@click.option("--trim/--detect-only", default=False, help="Actually trim paused sections (default: detect only)")
|
|
201
|
+
@click.option("--output", "-o", default=None, help="Output video for --trim (default: <video>.trimmed.mp4)")
|
|
202
|
+
@click.option("--size-ratio", default=0.50, help="Size-drop threshold (default 0.50)")
|
|
203
|
+
@click.option("--gap-density", default=0.50, help="PDT-gap density threshold (default 0.50)")
|
|
204
|
+
@click.option("--min-pause", default=10.0, help="Minimum pause duration in seconds (default 10)")
|
|
205
|
+
def trim_pauses(broadcast_url, browser, trim, output, size_ratio, gap_density, min_pause):
|
|
206
|
+
"""Detect (and optionally trim) paused sections in a broadcast.
|
|
207
|
+
|
|
208
|
+
Analyses HLS segments via HTTP HEAD requests (no full download) and
|
|
209
|
+
playlist PDT timestamps to find sections where the video was paused
|
|
210
|
+
while audio continued. Default: detect-only. Pass --trim to cut.
|
|
211
|
+
"""
|
|
212
|
+
if trim and not check_ffmpeg():
|
|
213
|
+
console.print("[red]ffmpeg not found - install with: brew install ffmpeg[/red]")
|
|
214
|
+
raise SystemExit(1)
|
|
215
|
+
|
|
216
|
+
console.print("[bold]Analysing HLS segments for pauses...[/bold]")
|
|
217
|
+
|
|
218
|
+
pauses = detect_pauses(
|
|
219
|
+
broadcast_url,
|
|
220
|
+
browser=browser,
|
|
221
|
+
size_ratio_threshold=size_ratio,
|
|
222
|
+
gap_density_threshold=gap_density,
|
|
223
|
+
min_pause_sec=min_pause,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
console.print(pause_report(pauses))
|
|
227
|
+
|
|
228
|
+
if trim and pauses:
|
|
229
|
+
video_path = Path("output") / "videos" / f"{broadcast_url.split('/')[-1]}.mp4"
|
|
230
|
+
if not video_path.exists():
|
|
231
|
+
console.print(f"[red]Video not found: {video_path}")
|
|
232
|
+
console.print(" Download first: broadcastx download <url>")
|
|
233
|
+
raise SystemExit(1)
|
|
234
|
+
|
|
235
|
+
out = Path(output) if output else Path(str(video_path).replace(".mp4", ".trimmed.mp4"))
|
|
236
|
+
console.print(f"\n[bold]Trimming -> {out}...")
|
|
237
|
+
try:
|
|
238
|
+
trim_intervals(video_path, pauses, out)
|
|
239
|
+
console.print(f" [green]Done -> {out}")
|
|
240
|
+
except Exception as e:
|
|
241
|
+
console.print(f" [red]Failed: {e}")
|
|
242
|
+
raise SystemExit(1)
|
|
243
|
+
elif trim and not pauses:
|
|
244
|
+
console.print("[green]Nothing to trim.")
|
broadcastx/config.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Shared configuration and constants for BroadcastX."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Default output directory (relative to cwd)
|
|
7
|
+
DEFAULT_OUTPUT_DIR = Path("output")
|
|
8
|
+
DEFAULT_VIDEOS_DIR = DEFAULT_OUTPUT_DIR / "videos"
|
|
9
|
+
DEFAULT_BROADCASTS_FILE = DEFAULT_OUTPUT_DIR / "broadcasts.json"
|
|
10
|
+
|
|
11
|
+
# Browser to extract cookies from (for yt-dlp)
|
|
12
|
+
DEFAULT_BROWSER = "chrome"
|
|
13
|
+
|
|
14
|
+
# X broadcast IDs observed in real broadcast URLs are opaque alphanumeric
|
|
15
|
+
# tokens, e.g. 1vAxRkBbDRzKl. Reject tiny fragments such as /broadcasts/1.
|
|
16
|
+
BROADCAST_ID_RE = r"[A-Za-z0-9]{8,}"
|
|
17
|
+
|
|
18
|
+
# Broadcast URL patterns to match
|
|
19
|
+
BROADCAST_PATTERNS = [
|
|
20
|
+
re.compile(rf"https?://(?:x|twitter)\.com/i/broadcasts/({BROADCAST_ID_RE})(?![A-Za-z0-9_])"),
|
|
21
|
+
re.compile(rf"https?://(?:www\.)?pscp\.tv/w/({BROADCAST_ID_RE})(?![A-Za-z0-9_])"),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# Twitter GraphQL endpoints to intercept
|
|
25
|
+
GRAPHQL_ENDPOINTS = [
|
|
26
|
+
"UserTweets",
|
|
27
|
+
"UserTweetsAndReplies",
|
|
28
|
+
"TweetDetail",
|
|
29
|
+
"SearchTimeline",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# Scanner defaults
|
|
33
|
+
DEFAULT_MAX_SCROLLS = 100 # Maximum number of scroll actions
|
|
34
|
+
DEFAULT_SCROLL_DELAY = 2.0 # Seconds between scrolls
|
|
35
|
+
DEFAULT_IDLE_TIMEOUT = 10.0 # Stop after N seconds with no new tweets
|
|
36
|
+
DEFAULT_HEADLESS = False # Show browser by default (useful for login)
|
|
37
|
+
|
|
38
|
+
# yt-dlp output template — uses broadcast ID as filename
|
|
39
|
+
YTDLP_OUTPUT_TEMPLATE = "%(id)s [%(timestamp>%Y-%m-%d %H.%M.%S)s] %(title)s.%(ext)s"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def ensure_output_dirs():
|
|
43
|
+
"""Create output directories if they don't exist."""
|
|
44
|
+
DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
DEFAULT_VIDEOS_DIR.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def extract_broadcast_id(url: str) -> str | None:
|
|
49
|
+
"""Extract broadcast ID from a broadcast URL."""
|
|
50
|
+
for pattern in BROADCAST_PATTERNS:
|
|
51
|
+
match = pattern.search(url)
|
|
52
|
+
if match:
|
|
53
|
+
return match.group(1)
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def is_broadcast_url(url: str) -> bool:
|
|
58
|
+
"""Check if a URL is a broadcast URL."""
|
|
59
|
+
return extract_broadcast_id(url) is not None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def normalize_broadcast_url(url: str) -> str | None:
|
|
63
|
+
"""Normalize a broadcast URL to the canonical x.com format."""
|
|
64
|
+
bid = extract_broadcast_id(url)
|
|
65
|
+
if bid:
|
|
66
|
+
return f"https://x.com/i/broadcasts/{bid}"
|
|
67
|
+
return None
|
broadcastx/downloader.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Downloader module — Download broadcast videos using yt-dlp.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from broadcastx.downloader import download_broadcast, download_all
|
|
6
|
+
|
|
7
|
+
# Single download
|
|
8
|
+
download_broadcast("https://x.com/i/broadcasts/1vAxRkBbDRzKl")
|
|
9
|
+
|
|
10
|
+
# Batch download from JSON file
|
|
11
|
+
download_all("broadcasts.json", output_dir="./videos")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import shutil
|
|
16
|
+
import subprocess
|
|
17
|
+
import threading
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
|
|
24
|
+
from .config import (
|
|
25
|
+
DEFAULT_BROWSER,
|
|
26
|
+
DEFAULT_VIDEOS_DIR,
|
|
27
|
+
YTDLP_OUTPUT_TEMPLATE,
|
|
28
|
+
extract_broadcast_id,
|
|
29
|
+
is_broadcast_url,
|
|
30
|
+
normalize_broadcast_url,
|
|
31
|
+
)
|
|
32
|
+
from .rotation import extract_rotation_sidecar
|
|
33
|
+
from .rotation import rotate_video as _apply_rotation
|
|
34
|
+
|
|
35
|
+
console = Console()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class DownloadResult:
|
|
40
|
+
"""Result of a single broadcast download attempt."""
|
|
41
|
+
url: str
|
|
42
|
+
success: bool
|
|
43
|
+
output_file: str | None = None
|
|
44
|
+
rotation_sidecar_file: str | None = None
|
|
45
|
+
rotation_applied: bool = False
|
|
46
|
+
error: str | None = None
|
|
47
|
+
warning: str | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def check_yt_dlp() -> bool:
|
|
51
|
+
"""Check if yt-dlp is installed and accessible."""
|
|
52
|
+
return shutil.which("yt-dlp") is not None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def check_ffmpeg() -> bool:
|
|
56
|
+
"""Check if ffmpeg is installed and accessible."""
|
|
57
|
+
return shutil.which("ffmpeg") is not None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def download_broadcast(
|
|
61
|
+
url: str,
|
|
62
|
+
output_dir: Path = DEFAULT_VIDEOS_DIR,
|
|
63
|
+
browser: str = DEFAULT_BROWSER,
|
|
64
|
+
verbose: bool = False,
|
|
65
|
+
) -> DownloadResult:
|
|
66
|
+
"""
|
|
67
|
+
Download a single broadcast video using yt-dlp.
|
|
68
|
+
|
|
69
|
+
No timeout — broadcasts can be hours long. yt-dlp runs until completion
|
|
70
|
+
with live output streamed to the terminal.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
url: Broadcast URL (x.com/i/broadcasts/... or pscp.tv/w/...)
|
|
74
|
+
output_dir: Directory to save the video
|
|
75
|
+
browser: Browser to extract cookies from
|
|
76
|
+
verbose: Show yt-dlp output
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
DownloadResult with success status and output file path
|
|
80
|
+
"""
|
|
81
|
+
normalized = normalize_broadcast_url(url)
|
|
82
|
+
if not normalized:
|
|
83
|
+
return DownloadResult(
|
|
84
|
+
url=url,
|
|
85
|
+
success=False,
|
|
86
|
+
error=f"Not a valid broadcast URL: {url}",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
broadcast_id = extract_broadcast_id(url)
|
|
90
|
+
output_dir = Path(output_dir)
|
|
91
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
|
|
93
|
+
output_template = str(output_dir / YTDLP_OUTPUT_TEMPLATE)
|
|
94
|
+
|
|
95
|
+
cmd = [
|
|
96
|
+
"yt-dlp",
|
|
97
|
+
"--cookies-from-browser", browser,
|
|
98
|
+
"-f", "bestvideo+bestaudio/best",
|
|
99
|
+
"--output", output_template,
|
|
100
|
+
"--merge-output-format", "mp4",
|
|
101
|
+
"--no-warnings",
|
|
102
|
+
"--newline", # Progress on new lines
|
|
103
|
+
"--no-overwrites", # Skip already downloaded
|
|
104
|
+
normalized,
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
console.print(f" [dim]Downloading {broadcast_id}...[/dim]")
|
|
108
|
+
if verbose:
|
|
109
|
+
console.print(f" [dim]$ {' '.join(cmd)}[/dim]")
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
# Stream output live — no timeout, broadcasts can be very long
|
|
113
|
+
process = subprocess.Popen(
|
|
114
|
+
cmd,
|
|
115
|
+
stdout=subprocess.PIPE,
|
|
116
|
+
stderr=subprocess.STDOUT,
|
|
117
|
+
text=True,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
output_lines = []
|
|
121
|
+
output_file = None
|
|
122
|
+
|
|
123
|
+
# Start rotation extraction in parallel with yt-dlp download
|
|
124
|
+
# Both read from CDN independently; no need to wait.
|
|
125
|
+
rotation_result: list = []
|
|
126
|
+
def _run_sidecar():
|
|
127
|
+
try:
|
|
128
|
+
sc = extract_rotation_sidecar(normalized, output_dir, browser=browser)
|
|
129
|
+
rotation_result.append(("ok", str(sc)))
|
|
130
|
+
except Exception as e:
|
|
131
|
+
rotation_result.append(("error", e))
|
|
132
|
+
|
|
133
|
+
rotation_thread = threading.Thread(target=_run_sidecar, daemon=True)
|
|
134
|
+
rotation_thread.start()
|
|
135
|
+
|
|
136
|
+
for line in process.stdout:
|
|
137
|
+
line = line.rstrip()
|
|
138
|
+
output_lines.append(line)
|
|
139
|
+
|
|
140
|
+
# Always show progress/status lines
|
|
141
|
+
if verbose or "[download]" in line or "[Merger]" in line or "already" in line.lower():
|
|
142
|
+
console.print(f" [dim]{line}[/dim]")
|
|
143
|
+
|
|
144
|
+
# Try to capture the output filename
|
|
145
|
+
if "Merging formats into" in line and '"' in line:
|
|
146
|
+
start = line.index('"') + 1
|
|
147
|
+
end = line.rindex('"')
|
|
148
|
+
output_file = line[start:end]
|
|
149
|
+
elif "Destination:" in line:
|
|
150
|
+
output_file = line.split("Destination:")[-1].strip()
|
|
151
|
+
|
|
152
|
+
process.wait()
|
|
153
|
+
|
|
154
|
+
if process.returncode == 0:
|
|
155
|
+
sidecar_file = None
|
|
156
|
+
warning = None
|
|
157
|
+
rotation_thread.join(timeout=300)
|
|
158
|
+
if rotation_result:
|
|
159
|
+
r = rotation_result[0]
|
|
160
|
+
kind, value = r
|
|
161
|
+
if kind == "error":
|
|
162
|
+
warning = f"Rotation sidecar failed: {value}"
|
|
163
|
+
console.print(f" [yellow]{warning}[/yellow]")
|
|
164
|
+
else:
|
|
165
|
+
sidecar_file = value
|
|
166
|
+
console.print(f" [green]Rotation sidecar[/green] → {sidecar_file}")
|
|
167
|
+
rotation_applied = False
|
|
168
|
+
if sidecar_file and output_file and Path(output_file).exists():
|
|
169
|
+
try:
|
|
170
|
+
_apply_rotation(output_file, sidecar_file)
|
|
171
|
+
# rotate_video returns the same path when it replaces in-place
|
|
172
|
+
rotation_applied = True
|
|
173
|
+
console.print(f" [green]Rotation corrected[/green] → {output_file}")
|
|
174
|
+
except subprocess.CalledProcessError as e:
|
|
175
|
+
err = (e.stderr.decode("utf-8", "replace") if isinstance(e.stderr, bytes) else (e.stderr or ""))
|
|
176
|
+
w = f"Rotation correction failed (exit {e.returncode}): {err[:400]}"
|
|
177
|
+
warning = f"{warning}; {w}" if warning else w
|
|
178
|
+
console.print(f" [yellow]{w}[/yellow]")
|
|
179
|
+
except Exception as e:
|
|
180
|
+
w = f"Rotation correction failed: {e!r}"
|
|
181
|
+
warning = f"{warning}; {w}" if warning else w
|
|
182
|
+
console.print(f" [yellow]{w}[/yellow]")
|
|
183
|
+
return DownloadResult(
|
|
184
|
+
url=normalized,
|
|
185
|
+
success=True,
|
|
186
|
+
output_file=output_file,
|
|
187
|
+
rotation_sidecar_file=sidecar_file,
|
|
188
|
+
rotation_applied=rotation_applied,
|
|
189
|
+
warning=warning,
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
# Extract error from output
|
|
193
|
+
error_msg = "\n".join(output_lines[-3:]) or f"yt-dlp exited with code {process.returncode}"
|
|
194
|
+
return DownloadResult(
|
|
195
|
+
url=normalized,
|
|
196
|
+
success=False,
|
|
197
|
+
error=error_msg,
|
|
198
|
+
)
|
|
199
|
+
except FileNotFoundError:
|
|
200
|
+
return DownloadResult(
|
|
201
|
+
url=normalized,
|
|
202
|
+
success=False,
|
|
203
|
+
error="yt-dlp not found. Install it with: brew install yt-dlp",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def download_all(
|
|
208
|
+
urls: list[str] | None = None,
|
|
209
|
+
from_file: Path | str | None = None,
|
|
210
|
+
output_dir: Path = DEFAULT_VIDEOS_DIR,
|
|
211
|
+
browser: str = DEFAULT_BROWSER,
|
|
212
|
+
verbose: bool = False,
|
|
213
|
+
parallel: int = 1,
|
|
214
|
+
) -> list[DownloadResult]:
|
|
215
|
+
"""
|
|
216
|
+
Download multiple broadcast videos.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
urls: List of broadcast URLs
|
|
220
|
+
from_file: Path to JSON file containing broadcast data
|
|
221
|
+
output_dir: Directory to save videos
|
|
222
|
+
browser: Browser to extract cookies from
|
|
223
|
+
verbose: Show yt-dlp output
|
|
224
|
+
parallel: Number of concurrent downloads (default: 1 = sequential)
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
List of DownloadResult objects
|
|
228
|
+
"""
|
|
229
|
+
all_urls = list(urls or [])
|
|
230
|
+
|
|
231
|
+
# Load URLs from file if provided
|
|
232
|
+
if from_file:
|
|
233
|
+
file_urls = _load_urls_from_file(Path(from_file))
|
|
234
|
+
all_urls.extend(file_urls)
|
|
235
|
+
|
|
236
|
+
if not all_urls:
|
|
237
|
+
console.print("[yellow]No broadcast URLs to download.[/yellow]")
|
|
238
|
+
return []
|
|
239
|
+
|
|
240
|
+
# Deduplicate while preserving order
|
|
241
|
+
seen = set()
|
|
242
|
+
unique_urls = []
|
|
243
|
+
for url in all_urls:
|
|
244
|
+
normalized = normalize_broadcast_url(url)
|
|
245
|
+
if normalized and normalized not in seen:
|
|
246
|
+
seen.add(normalized)
|
|
247
|
+
unique_urls.append(normalized)
|
|
248
|
+
|
|
249
|
+
total = len(unique_urls)
|
|
250
|
+
console.print(f"\n[bold]Downloading {total} broadcast(s)" + (f" ({parallel} parallel)" if parallel > 1 else "") + f"...[/bold]\n")
|
|
251
|
+
|
|
252
|
+
if parallel <= 1:
|
|
253
|
+
# Sequential download
|
|
254
|
+
results = []
|
|
255
|
+
for i, url in enumerate(unique_urls, 1):
|
|
256
|
+
console.print(f"[bold][{i}/{total}][/bold] {url}")
|
|
257
|
+
result = download_broadcast(url, output_dir=output_dir, browser=browser, verbose=verbose)
|
|
258
|
+
results.append(result)
|
|
259
|
+
if result.success:
|
|
260
|
+
console.print(f" [green]✓ Done[/green]" + (f" → {result.output_file}" if result.output_file else ""))
|
|
261
|
+
else:
|
|
262
|
+
console.print(f" [red]✗ Failed: {result.error}[/red]")
|
|
263
|
+
console.print()
|
|
264
|
+
else:
|
|
265
|
+
# Parallel download
|
|
266
|
+
results = [None] * total
|
|
267
|
+
lock = threading.Lock()
|
|
268
|
+
completed_count = 0
|
|
269
|
+
|
|
270
|
+
def _download_one(index: int, url: str) -> tuple[int, DownloadResult]:
|
|
271
|
+
return index, download_broadcast(url, output_dir=output_dir, browser=browser, verbose=verbose)
|
|
272
|
+
|
|
273
|
+
with ThreadPoolExecutor(max_workers=parallel) as executor:
|
|
274
|
+
futures = {
|
|
275
|
+
executor.submit(_download_one, i, url): (i, url)
|
|
276
|
+
for i, url in enumerate(unique_urls)
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
for future in as_completed(futures):
|
|
280
|
+
idx, result = future.result()
|
|
281
|
+
results[idx] = result
|
|
282
|
+
url = unique_urls[idx]
|
|
283
|
+
bid = extract_broadcast_id(url)
|
|
284
|
+
|
|
285
|
+
with lock:
|
|
286
|
+
completed_count += 1
|
|
287
|
+
if result.success:
|
|
288
|
+
console.print(f" [green]✓[/green] [{completed_count}/{total}] {bid}" + (f" → {result.output_file}" if result.output_file else ""))
|
|
289
|
+
else:
|
|
290
|
+
console.print(f" [red]✗[/red] [{completed_count}/{total}] {bid}: {result.error}")
|
|
291
|
+
|
|
292
|
+
console.print()
|
|
293
|
+
|
|
294
|
+
# Summary
|
|
295
|
+
succeeded = sum(1 for r in results if r and r.success)
|
|
296
|
+
failed = sum(1 for r in results if r and not r.success)
|
|
297
|
+
console.print(f"[bold]Done:[/bold] {succeeded} succeeded, {failed} failed")
|
|
298
|
+
|
|
299
|
+
return results
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _load_urls_from_file(path: Path) -> list[str]:
|
|
303
|
+
"""
|
|
304
|
+
Load broadcast URLs from a JSON file.
|
|
305
|
+
|
|
306
|
+
Supports multiple formats:
|
|
307
|
+
1. Plain list of URLs: ["https://x.com/i/broadcasts/abc", ...]
|
|
308
|
+
2. Scanner output: {"broadcasts": [{"url": "..."}, ...]}
|
|
309
|
+
3. List of objects: [{"url": "..."}, ...]
|
|
310
|
+
"""
|
|
311
|
+
if not path.exists():
|
|
312
|
+
console.print(f"[red]File not found: {path}[/red]")
|
|
313
|
+
return []
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
data = json.loads(path.read_text())
|
|
317
|
+
except json.JSONDecodeError as e:
|
|
318
|
+
console.print(f"[red]Invalid JSON in {path}: {e}[/red]")
|
|
319
|
+
return []
|
|
320
|
+
|
|
321
|
+
urls = []
|
|
322
|
+
if isinstance(data, list):
|
|
323
|
+
for item in data:
|
|
324
|
+
if isinstance(item, str):
|
|
325
|
+
urls.append(item)
|
|
326
|
+
elif isinstance(item, dict) and "url" in item:
|
|
327
|
+
urls.append(item["url"])
|
|
328
|
+
elif isinstance(data, dict) and "broadcasts" in data:
|
|
329
|
+
for item in data["broadcasts"]:
|
|
330
|
+
if isinstance(item, str):
|
|
331
|
+
urls.append(item)
|
|
332
|
+
elif isinstance(item, dict) and "url" in item:
|
|
333
|
+
urls.append(item["url"])
|
|
334
|
+
|
|
335
|
+
valid = [u for u in urls if is_broadcast_url(u)]
|
|
336
|
+
console.print(f" [dim]Loaded {len(valid)} broadcast URL(s) from {path}[/dim]")
|
|
337
|
+
return valid
|