java2-extention 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,525 @@
1
+ """
2
+ downloader.py — freepornvideos.xxx direct MP4 + HLS downloader
3
+ - MP4/webm: system curl stream-to-disk on Termux, urllib3 elsewhere
4
+ - HLS (m3u8): segment-by-segment download + ffmpeg concat
5
+ - Resume: deterministic temp dir, skips already-downloaded segments
6
+ - 2 GB RAM safe: nothing buffered in memory
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import os
12
+ import platform
13
+ import re
14
+ import shutil
15
+ import subprocess
16
+ import sys
17
+ import tempfile
18
+ import threading
19
+ import time
20
+ import urllib3
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
+ from pathlib import Path
23
+ from typing import Optional, Callable
24
+ from urllib.parse import urljoin
25
+
26
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
27
+
28
+ ProgressCB = Callable[[int, int, float], None]
29
+
30
+ _IS_TERMUX = (
31
+ platform.machine() in ("aarch64", "armv7l")
32
+ or platform.system() == "Android"
33
+ or "com.termux" in sys.executable
34
+ )
35
+
36
+ _HAS_FFMPEG = bool(shutil.which("ffmpeg"))
37
+ _HAS_SYS_CURL = bool(shutil.which("curl"))
38
+ _POOL_SIZE = 4 if _IS_TERMUX else 16
39
+
40
+ UA = (
41
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
42
+ "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
43
+ )
44
+
45
+ SITE_ORIGIN = "https://www.freepornvideos.xxx"
46
+ SITE_REFERER = "https://www.freepornvideos.xxx/"
47
+
48
+ _LOG_FILE: Optional[Path] = None
49
+
50
+
51
+ def _slog(msg: str) -> None:
52
+ print(msg, flush=True)
53
+ if _LOG_FILE:
54
+ try:
55
+ with open(_LOG_FILE, "a", encoding="utf-8") as f:
56
+ f.write(msg + "\n")
57
+ except Exception:
58
+ pass
59
+
60
+
61
+ def set_verbose(_: bool = True) -> None:
62
+ pass
63
+
64
+
65
+ # ── urllib3 pool ──────────────────────────────────────────────
66
+
67
+ _pool_lock = threading.Lock()
68
+ _pool: Optional[urllib3.PoolManager] = None
69
+
70
+
71
+ def _get_pool() -> urllib3.PoolManager:
72
+ global _pool
73
+ with _pool_lock:
74
+ if _pool is None:
75
+ _pool = urllib3.PoolManager(
76
+ num_pools=_POOL_SIZE,
77
+ maxsize=_POOL_SIZE,
78
+ headers={"User-Agent": UA},
79
+ timeout=urllib3.Timeout(connect=15, read=120),
80
+ )
81
+ return _pool
82
+
83
+
84
+ # ── curl helpers ──────────────────────────────────────────────
85
+
86
+ def _curl_get_text(url: str, referer: str = SITE_REFERER,
87
+ timeout: float = 30.0) -> tuple[int, str]:
88
+ curl_bin = shutil.which("curl") or "curl"
89
+ cmd = [
90
+ curl_bin, "-sS", "-L",
91
+ "--max-time", str(int(timeout)),
92
+ "--compressed",
93
+ "-A", UA,
94
+ "-H", f"Referer: {referer}",
95
+ "-H", f"Origin: {SITE_ORIGIN}",
96
+ "-H", "Accept: */*",
97
+ "--write-out", "\n__STATUS__:%{http_code}",
98
+ url,
99
+ ]
100
+ result = subprocess.run(cmd, capture_output=True, timeout=timeout + 5)
101
+ raw = result.stdout.decode("utf-8", errors="replace")
102
+ if "\n__STATUS__:" in raw:
103
+ body, st = raw.rsplit("\n__STATUS__:", 1)
104
+ status = int(st.strip()) if st.strip().isdigit() else 0
105
+ else:
106
+ body, status = raw, 0
107
+ return status, body
108
+
109
+
110
+ def _curl_download_to_file(url: str, referer: str, out_path: Path,
111
+ timeout: float = 3600) -> int:
112
+ """Stream a direct MP4/webm file to disk via system curl. Returns bytes written."""
113
+ curl_bin = shutil.which("curl") or "curl"
114
+ cmd = [
115
+ curl_bin, "-sS", "-L",
116
+ "--max-time", str(int(timeout)),
117
+ "-A", UA,
118
+ "-H", f"Referer: {referer}",
119
+ "-H", f"Origin: {SITE_ORIGIN}",
120
+ "-H", "Accept: */*",
121
+ "-H", "Accept-Encoding: identity",
122
+ "-H", "Sec-Fetch-Dest: video",
123
+ "-H", "Sec-Fetch-Mode: no-cors",
124
+ "-H", "Sec-Fetch-Site: cross-site",
125
+ "-o", str(out_path),
126
+ "--write-out", "\n__STATUS__:%{http_code}",
127
+ url,
128
+ ]
129
+ try:
130
+ result = subprocess.run(cmd, capture_output=True, timeout=timeout + 10)
131
+ raw = result.stdout.decode("utf-8", errors="replace")
132
+ status = 0
133
+ if "\n__STATUS__:" in raw:
134
+ _, st = raw.rsplit("\n__STATUS__:", 1)
135
+ status = int(st.strip()) if st.strip().isdigit() else 0
136
+ err = result.stderr.decode("utf-8", errors="replace")
137
+ if err.strip():
138
+ _slog(f" [dl] curl stderr: {err[:400]}")
139
+ if status == 200 and out_path.exists() and out_path.stat().st_size > 0:
140
+ return out_path.stat().st_size
141
+ raise RuntimeError(f"curl HTTP {status}")
142
+ except subprocess.TimeoutExpired:
143
+ raise RuntimeError("curl download timed out")
144
+
145
+
146
+ # ── direct MP4/webm downloader ────────────────────────────────
147
+
148
+ def download_direct(
149
+ url: str,
150
+ out_path: Path,
151
+ referer: str = SITE_REFERER,
152
+ threads: int = 8,
153
+ progress: Optional[ProgressCB] = None,
154
+ ) -> Path:
155
+ """
156
+ Download a direct MP4/webm file stream-to-disk.
157
+ On Termux uses system curl. Elsewhere uses urllib3 chunked download.
158
+ """
159
+ _slog(f" [dl] IS_TERMUX={_IS_TERMUX} HAS_SYS_CURL={_HAS_SYS_CURL} HAS_FFMPEG={_HAS_FFMPEG}")
160
+ _slog(f" [dl] direct download: {url}")
161
+ out_path.parent.mkdir(parents=True, exist_ok=True)
162
+
163
+ if _IS_TERMUX and _HAS_SYS_CURL:
164
+ _slog(" [dl] backend: system curl (stream-to-disk)")
165
+ # Run curl in a subprocess, poll file size for progress
166
+ curl_bin = shutil.which("curl") or "curl"
167
+ cmd = [
168
+ curl_bin, "-sS", "-L",
169
+ "--max-time", "7200",
170
+ "-A", UA,
171
+ "-H", f"Referer: {referer}",
172
+ "-H", f"Origin: {SITE_ORIGIN}",
173
+ "-H", "Accept: */*",
174
+ "-H", "Accept-Encoding: identity",
175
+ "-H", "Sec-Fetch-Dest: video",
176
+ "-H", "Sec-Fetch-Mode: no-cors",
177
+ "-H", "Sec-Fetch-Site: cross-site",
178
+ "-o", str(out_path),
179
+ url,
180
+ ]
181
+ proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
182
+ start = time.time()
183
+ prev_sz = 0
184
+ prev_t = start
185
+ try:
186
+ while proc.poll() is None:
187
+ time.sleep(1.0)
188
+ sz = out_path.stat().st_size if out_path.exists() else 0
189
+ now = time.time()
190
+ dt = now - prev_t
191
+ if dt > 0:
192
+ speed = (sz - prev_sz) / dt
193
+ else:
194
+ speed = 0
195
+ prev_sz, prev_t = sz, now
196
+ if progress:
197
+ progress(sz, 0, speed)
198
+ _slog(f" [dl] {sz//1024//1024} MB {speed/1024:.0f} KB/s")
199
+ except KeyboardInterrupt:
200
+ proc.terminate()
201
+ raise
202
+ finally:
203
+ proc.wait()
204
+
205
+ if proc.returncode != 0:
206
+ err = proc.stderr.read().decode("utf-8", errors="replace")
207
+ raise RuntimeError(f"curl failed (rc={proc.returncode}): {err[:300]}")
208
+
209
+ size = out_path.stat().st_size if out_path.exists() else 0
210
+ if size == 0:
211
+ raise RuntimeError("curl wrote 0 bytes")
212
+ _slog(f" [dl] done: {size/1_048_576:.1f} MB")
213
+ return out_path
214
+
215
+ # ── urllib3 chunked fallback ───────────────────────────────
216
+ _slog(" [dl] backend: urllib3 chunked")
217
+ pool = _get_pool()
218
+ headers = {
219
+ "User-Agent": UA,
220
+ "Referer": referer,
221
+ "Origin": SITE_ORIGIN,
222
+ "Accept": "*/*",
223
+ "Accept-Encoding": "identity",
224
+ }
225
+ resp = pool.request(
226
+ "GET", url, headers=headers,
227
+ timeout=urllib3.Timeout(connect=15, read=300),
228
+ preload_content=False,
229
+ )
230
+ if resp.status != 200:
231
+ resp.drain_conn()
232
+ raise RuntimeError(f"HTTP {resp.status}")
233
+
234
+ total = int(resp.headers.get("Content-Length", 0)) or 0
235
+ CHUNK = 256 * 1024
236
+ downloaded = 0
237
+ start = time.time()
238
+
239
+ with open(out_path, "wb") as f:
240
+ for chunk in resp.stream(CHUNK):
241
+ if not chunk:
242
+ continue
243
+ f.write(chunk)
244
+ downloaded += len(chunk)
245
+ elapsed = max(time.time() - start, 0.001)
246
+ speed = downloaded / elapsed
247
+ if progress:
248
+ progress(downloaded, total, speed)
249
+
250
+ _slog(f" [dl] done: {downloaded/1_048_576:.1f} MB")
251
+ return out_path
252
+
253
+
254
+ # ── HLS segment downloader (same as java-extention) ───────────
255
+
256
+ def _fetch_text(url: str, referer: str = SITE_REFERER) -> str:
257
+ if _IS_TERMUX and _HAS_SYS_CURL:
258
+ status, text = _curl_get_text(url, referer=referer)
259
+ if status != 200:
260
+ raise RuntimeError(f"HTTP {status} fetching {url}")
261
+ return text
262
+ pool = _get_pool()
263
+ r = pool.request("GET", url, headers={
264
+ "User-Agent": UA, "Referer": referer, "Accept-Encoding": "identity",
265
+ }, timeout=urllib3.Timeout(connect=15, read=60))
266
+ if r.status != 200:
267
+ raise RuntimeError(f"HTTP {r.status} fetching {url}")
268
+ return r.data.decode("utf-8", errors="replace")
269
+
270
+
271
+ def _validate_m3u8(text: str, url: str) -> bool:
272
+ if "#EXTM3U" not in text[:500]:
273
+ _slog(f" [dl] ERROR: m3u8 at {url} did not return a valid playlist")
274
+ _slog(f" [dl] first 300 chars: {text[:300]!r}")
275
+ return False
276
+ return True
277
+
278
+
279
+ def _parse_m3u8_variants(text: str, base_url: str) -> list[dict]:
280
+ variants = []
281
+ lines = text.splitlines()
282
+ for i, line in enumerate(lines):
283
+ if line.startswith("#EXT-X-STREAM-INF"):
284
+ bw = 0
285
+ m = re.search(r"BANDWIDTH=(\d+)", line)
286
+ if m:
287
+ bw = int(m.group(1))
288
+ res = ""
289
+ m2 = re.search(r"RESOLUTION=(\d+x\d+)", line)
290
+ if m2:
291
+ res = m2.group(1)
292
+ if i + 1 < len(lines) and not lines[i + 1].startswith("#"):
293
+ seg_url = urljoin(base_url, lines[i + 1].strip())
294
+ variants.append({"url": seg_url, "bandwidth": bw, "resolution": res})
295
+ return variants
296
+
297
+
298
+ def _parse_segment_playlist(text: str, base_url: str) -> list[str]:
299
+ urls = []
300
+ for line in text.splitlines():
301
+ line = line.strip()
302
+ if not line or line.startswith("#"):
303
+ continue
304
+ urls.append(urljoin(base_url, line))
305
+ return urls
306
+
307
+
308
+ def _curl_download_segment(url: str, out_path: Path, timeout: float = 120) -> int:
309
+ curl_bin = shutil.which("curl") or "curl"
310
+ cmd = [
311
+ curl_bin, "-sS", "-L",
312
+ "--max-time", str(int(timeout)),
313
+ "-A", UA,
314
+ "-H", f"Referer: {SITE_REFERER}",
315
+ "-H", f"Origin: {SITE_ORIGIN}",
316
+ "-H", "Accept: */*",
317
+ "-H", "Accept-Encoding: identity",
318
+ "-H", "Sec-Fetch-Dest: empty",
319
+ "-H", "Sec-Fetch-Mode: cors",
320
+ "-H", "Sec-Fetch-Site: cross-site",
321
+ "-o", str(out_path),
322
+ "--write-out", "\n__STATUS__:%{http_code}",
323
+ url,
324
+ ]
325
+ result = subprocess.run(cmd, capture_output=True, timeout=timeout + 5)
326
+ raw = result.stdout.decode("utf-8", errors="replace")
327
+ status = 0
328
+ if "\n__STATUS__:" in raw:
329
+ _, st = raw.rsplit("\n__STATUS__:", 1)
330
+ status = int(st.strip()) if st.strip().isdigit() else 0
331
+ if status == 200 and out_path.exists() and out_path.stat().st_size > 0:
332
+ return out_path.stat().st_size
333
+ err = result.stderr.decode("utf-8", errors="replace")
334
+ if err.strip():
335
+ _slog(f" [dl] curl seg stderr: {err[:200]}")
336
+ raise RuntimeError(f"curl seg HTTP {status}")
337
+
338
+
339
+ def _download_segment(url: str, idx: int, total: int, out_path: Path) -> int:
340
+ MAX_TRIES = 5
341
+ delay = 1
342
+ for attempt in range(1, MAX_TRIES + 1):
343
+ try:
344
+ if _IS_TERMUX and _HAS_SYS_CURL:
345
+ return _curl_download_segment(url, out_path)
346
+ pool = _get_pool()
347
+ r = pool.request("GET", url, headers={
348
+ "User-Agent": UA,
349
+ "Referer": SITE_REFERER,
350
+ "Origin": SITE_ORIGIN,
351
+ "Accept": "*/*",
352
+ "Accept-Encoding": "identity",
353
+ }, timeout=urllib3.Timeout(connect=10, read=120), preload_content=False)
354
+ if r.status != 200:
355
+ r.drain_conn()
356
+ raise RuntimeError(f"HTTP {r.status}")
357
+ CHUNK = 128 * 1024
358
+ sz = 0
359
+ with open(out_path, "wb") as f:
360
+ for chunk in r.stream(CHUNK):
361
+ if chunk:
362
+ f.write(chunk)
363
+ sz += len(chunk)
364
+ return sz
365
+ except Exception as e:
366
+ _slog(f" [dl] seg {idx+1}/{total} HTTP error attempt {attempt}/{MAX_TRIES}: {e}")
367
+ if attempt < MAX_TRIES:
368
+ _slog(f" [dl] retrying in {delay}s…")
369
+ time.sleep(delay)
370
+ delay = min(delay * 2, 30)
371
+ else:
372
+ raise
373
+
374
+
375
+ def _resume_dir(key: str) -> Path:
376
+ h = hashlib.md5(key.encode(), usedforsecurity=False).hexdigest()[:14]
377
+ d = Path(tempfile.gettempdir()) / f"fpv_{h}"
378
+ d.mkdir(parents=True, exist_ok=True)
379
+ return d
380
+
381
+
382
+ def _run_hls_download(
383
+ urls: list[str],
384
+ threads: int,
385
+ progress: Optional[ProgressCB],
386
+ resume_key: str = "",
387
+ ) -> tuple[Path, list[str]]:
388
+ total = len(urls)
389
+ tmp_dir = _resume_dir(resume_key) if resume_key else Path(tempfile.mkdtemp(prefix="fpv_"))
390
+ _slog(f" [dl] tmp dir: {tmp_dir}")
391
+
392
+ already = sum(
393
+ 1 for i in range(total)
394
+ if (tmp_dir / f"{i}.ts").exists() and (tmp_dir / f"{i}.ts").stat().st_size > 0
395
+ )
396
+ if already:
397
+ _slog(f" [dl] resume: {already}/{total} segments already on disk — skipping")
398
+
399
+ pending = [
400
+ i for i in range(total)
401
+ if not ((tmp_dir / f"{i}.ts").exists() and (tmp_dir / f"{i}.ts").stat().st_size > 0)
402
+ ]
403
+
404
+ completed = already
405
+ failed: list[str] = []
406
+ lock = threading.Lock()
407
+
408
+ def _task(idx_url: tuple[int, str]) -> tuple[int, int]:
409
+ idx, url = idx_url
410
+ out = tmp_dir / f"{idx}.ts"
411
+ if out.exists() and out.stat().st_size > 0:
412
+ return idx, out.stat().st_size
413
+ sz = _download_segment(url, idx, total, out)
414
+ _slog(f" [dl] seg {idx+1}/{total} {sz//1024}KB")
415
+ return idx, sz
416
+
417
+ with ThreadPoolExecutor(max_workers=threads) as ex:
418
+ futs = {ex.submit(_task, (i, urls[i])): i for i in range(total)}
419
+ for fut in as_completed(futs):
420
+ try:
421
+ idx, sz = fut.result()
422
+ except Exception as e:
423
+ idx = futs[fut]
424
+ _slog(f" [dl] seg {idx+1}/{total} FAILED: {e}")
425
+ failed.append(f"seg {idx+1}: {e}")
426
+ (tmp_dir / f"{idx}.ts").write_bytes(b"")
427
+ with lock:
428
+ completed += 1
429
+ if progress:
430
+ progress(completed, total, 0)
431
+
432
+ return tmp_dir, failed
433
+
434
+
435
+ def _ffmpeg_concat(tmp_dir: Path, total: int, out_path: Path) -> None:
436
+ if not _HAS_FFMPEG:
437
+ _slog(" [dl] ffmpeg not found — raw byte concat")
438
+ _raw_concat(tmp_dir, total, out_path)
439
+ return
440
+
441
+ lines = []
442
+ filelist = tmp_dir / "filelist.txt"
443
+ for i in range(total):
444
+ p = tmp_dir / f"{i}.ts"
445
+ if p.exists() and p.stat().st_size > 0:
446
+ lines.append(f"file '{p}'\n")
447
+ filelist.write_text("".join(lines), encoding="utf-8")
448
+ out_path.parent.mkdir(parents=True, exist_ok=True)
449
+
450
+ cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(filelist), "-c", "copy", str(out_path)]
451
+ result = subprocess.run(cmd, capture_output=True, timeout=600)
452
+ if result.returncode != 0:
453
+ _slog(f" [dl] ffmpeg error: {result.stderr.decode('utf-8', errors='replace')[-400:]}")
454
+ _raw_concat(tmp_dir, total, out_path)
455
+ try:
456
+ filelist.unlink(missing_ok=True)
457
+ except Exception:
458
+ pass
459
+ for i in range(total):
460
+ try:
461
+ (tmp_dir / f"{i}.ts").unlink(missing_ok=True)
462
+ except Exception:
463
+ pass
464
+
465
+
466
+ def _raw_concat(tmp_dir: Path, total: int, out_path: Path) -> None:
467
+ CHUNK = 256 * 1024
468
+ out_path.parent.mkdir(parents=True, exist_ok=True)
469
+ with open(out_path, "wb") as fout:
470
+ for i in range(total):
471
+ p = tmp_dir / f"{i}.ts"
472
+ if p.exists() and p.stat().st_size > 0:
473
+ with open(p, "rb") as fin:
474
+ while True:
475
+ chunk = fin.read(CHUNK)
476
+ if not chunk:
477
+ break
478
+ fout.write(chunk)
479
+ try:
480
+ p.unlink(missing_ok=True)
481
+ except Exception:
482
+ pass
483
+
484
+
485
+ def download_hls(
486
+ m3u8_url: str,
487
+ out_path: Path,
488
+ threads: int = 8,
489
+ progress: Optional[ProgressCB] = None,
490
+ ) -> Path:
491
+ referer = SITE_REFERER
492
+ _slog(" [dl] resolving HLS playlist…")
493
+ text = _fetch_text(m3u8_url, referer)
494
+
495
+ if not _validate_m3u8(text, m3u8_url):
496
+ raise RuntimeError(f"m3u8 returned non-playlist content: {text[:200]!r}")
497
+
498
+ if "#EXT-X-STREAM-INF" in text:
499
+ variants = _parse_m3u8_variants(text, m3u8_url)
500
+ if variants:
501
+ variants.sort(key=lambda v: v["bandwidth"], reverse=True)
502
+ m3u8_url = variants[0]["url"]
503
+ text = _fetch_text(m3u8_url, referer)
504
+
505
+ segments = _parse_segment_playlist(text, m3u8_url)
506
+ if not segments:
507
+ raise RuntimeError("No segments in playlist")
508
+
509
+ _slog(f" [dl] HLS: {len(segments)} segments threads={threads} ffmpeg={_HAS_FFMPEG}")
510
+
511
+ tmp_dir, failed = _run_hls_download(segments, threads, progress, resume_key=m3u8_url)
512
+
513
+ _slog(" [dl] concatenating…")
514
+ _ffmpeg_concat(tmp_dir, len(segments), out_path)
515
+
516
+ try:
517
+ tmp_dir.rmdir()
518
+ except Exception:
519
+ pass
520
+
521
+ size = out_path.stat().st_size
522
+ _slog(f" [dl] saved: {out_path} ({size/1_048_576:.1f} MB)")
523
+ if failed:
524
+ _slog(f" [dl] WARNING: {len(failed)} segment(s) failed")
525
+ return out_path