PyPI - java2-extention - Versions diffs - 1.0.0__py3-none-any.whl - Mend

java2-extention 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

java2_extention/__init__.py +1 -0
java2_extention/__main__.py +325 -0
java2_extention/cache.py +52 -0
java2_extention/config.py +75 -0
java2_extention/downloader.py +525 -0
java2_extention/http_client.py +232 -0
java2_extention/scraper.py +345 -0
java2_extention-1.0.0.dist-info/METADATA +53 -0
java2_extention-1.0.0.dist-info/RECORD +12 -0
java2_extention-1.0.0.dist-info/WHEEL +5 -0
java2_extention-1.0.0.dist-info/entry_points.txt +2 -0
java2_extention-1.0.0.dist-info/top_level.txt +1 -0

java2_extention/downloader.py ADDED Viewed

@@ -0,0 +1,525 @@
+"""
+downloader.py — freepornvideos.xxx direct MP4 + HLS downloader
+- MP4/webm: system curl stream-to-disk on Termux, urllib3 elsewhere
+- HLS (m3u8): segment-by-segment download + ffmpeg concat
+- Resume: deterministic temp dir, skips already-downloaded segments
+- 2 GB RAM safe: nothing buffered in memory
+"""
+from __future__ import annotations
+import hashlib
+import os
+import platform
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import urllib3
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Optional, Callable
+from urllib.parse import urljoin
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+ProgressCB = Callable[[int, int, float], None]
+_IS_TERMUX = (
+    platform.machine() in ("aarch64", "armv7l")
+    or platform.system() == "Android"
+    or "com.termux" in sys.executable
+)
+_HAS_FFMPEG   = bool(shutil.which("ffmpeg"))
+_HAS_SYS_CURL = bool(shutil.which("curl"))
+_POOL_SIZE    = 4 if _IS_TERMUX else 16
+UA = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+)
+SITE_ORIGIN  = "https://www.freepornvideos.xxx"
+SITE_REFERER = "https://www.freepornvideos.xxx/"
+_LOG_FILE: Optional[Path] = None
+def _slog(msg: str) -> None:
+    print(msg, flush=True)
+    if _LOG_FILE:
+        try:
+            with open(_LOG_FILE, "a", encoding="utf-8") as f:
+                f.write(msg + "\n")
+        except Exception:
+            pass
+def set_verbose(_: bool = True) -> None:
+    pass
+# ── urllib3 pool ──────────────────────────────────────────────
+_pool_lock = threading.Lock()
+_pool: Optional[urllib3.PoolManager] = None
+def _get_pool() -> urllib3.PoolManager:
+    global _pool
+    with _pool_lock:
+        if _pool is None:
+            _pool = urllib3.PoolManager(
+                num_pools=_POOL_SIZE,
+                maxsize=_POOL_SIZE,
+                headers={"User-Agent": UA},
+                timeout=urllib3.Timeout(connect=15, read=120),
+            )
+        return _pool
+# ── curl helpers ──────────────────────────────────────────────
+def _curl_get_text(url: str, referer: str = SITE_REFERER,
+                   timeout: float = 30.0) -> tuple[int, str]:
+    curl_bin = shutil.which("curl") or "curl"
+    cmd = [
+        curl_bin, "-sS", "-L",
+        "--max-time", str(int(timeout)),
+        "--compressed",
+        "-A", UA,
+        "-H", f"Referer: {referer}",
+        "-H", f"Origin: {SITE_ORIGIN}",
+        "-H", "Accept: */*",
+        "--write-out", "\n__STATUS__:%{http_code}",
+        url,
+    ]
+    result = subprocess.run(cmd, capture_output=True, timeout=timeout + 5)
+    raw = result.stdout.decode("utf-8", errors="replace")
+    if "\n__STATUS__:" in raw:
+        body, st = raw.rsplit("\n__STATUS__:", 1)
+        status = int(st.strip()) if st.strip().isdigit() else 0
+    else:
+        body, status = raw, 0
+    return status, body
+def _curl_download_to_file(url: str, referer: str, out_path: Path,
+                            timeout: float = 3600) -> int:
+    """Stream a direct MP4/webm file to disk via system curl. Returns bytes written."""
+    curl_bin = shutil.which("curl") or "curl"
+    cmd = [
+        curl_bin, "-sS", "-L",
+        "--max-time", str(int(timeout)),
+        "-A", UA,
+        "-H", f"Referer: {referer}",
+        "-H", f"Origin: {SITE_ORIGIN}",
+        "-H", "Accept: */*",
+        "-H", "Accept-Encoding: identity",
+        "-H", "Sec-Fetch-Dest: video",
+        "-H", "Sec-Fetch-Mode: no-cors",
+        "-H", "Sec-Fetch-Site: cross-site",
+        "-o", str(out_path),
+        "--write-out", "\n__STATUS__:%{http_code}",
+        url,
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, timeout=timeout + 10)
+        raw    = result.stdout.decode("utf-8", errors="replace")
+        status = 0
+        if "\n__STATUS__:" in raw:
+            _, st = raw.rsplit("\n__STATUS__:", 1)
+            status = int(st.strip()) if st.strip().isdigit() else 0
+        err = result.stderr.decode("utf-8", errors="replace")
+        if err.strip():
+            _slog(f"  [dl]  curl stderr: {err[:400]}")
+        if status == 200 and out_path.exists() and out_path.stat().st_size > 0:
+            return out_path.stat().st_size
+        raise RuntimeError(f"curl HTTP {status}")
+    except subprocess.TimeoutExpired:
+        raise RuntimeError("curl download timed out")
+# ── direct MP4/webm downloader ────────────────────────────────
+def download_direct(
+    url:      str,
+    out_path: Path,
+    referer:  str = SITE_REFERER,
+    threads:  int = 8,
+    progress: Optional[ProgressCB] = None,
+) -> Path:
+    """
+    Download a direct MP4/webm file stream-to-disk.
+    On Termux uses system curl. Elsewhere uses urllib3 chunked download.
+    """
+    _slog(f"  [dl]  IS_TERMUX={_IS_TERMUX}  HAS_SYS_CURL={_HAS_SYS_CURL}  HAS_FFMPEG={_HAS_FFMPEG}")
+    _slog(f"  [dl]  direct download: {url}")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    if _IS_TERMUX and _HAS_SYS_CURL:
+        _slog("  [dl]  backend: system curl (stream-to-disk)")
+        # Run curl in a subprocess, poll file size for progress
+        curl_bin = shutil.which("curl") or "curl"
+        cmd = [
+            curl_bin, "-sS", "-L",
+            "--max-time", "7200",
+            "-A", UA,
+            "-H", f"Referer: {referer}",
+            "-H", f"Origin: {SITE_ORIGIN}",
+            "-H", "Accept: */*",
+            "-H", "Accept-Encoding: identity",
+            "-H", "Sec-Fetch-Dest: video",
+            "-H", "Sec-Fetch-Mode: no-cors",
+            "-H", "Sec-Fetch-Site: cross-site",
+            "-o", str(out_path),
+            url,
+        ]
+        proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
+        start = time.time()
+        prev_sz = 0
+        prev_t  = start
+        try:
+            while proc.poll() is None:
+                time.sleep(1.0)
+                sz = out_path.stat().st_size if out_path.exists() else 0
+                now  = time.time()
+                dt   = now - prev_t
+                if dt > 0:
+                    speed = (sz - prev_sz) / dt
+                else:
+                    speed = 0
+                prev_sz, prev_t = sz, now
+                if progress:
+                    progress(sz, 0, speed)
+                _slog(f"  [dl]  {sz//1024//1024} MB  {speed/1024:.0f} KB/s")
+        except KeyboardInterrupt:
+            proc.terminate()
+            raise
+        finally:
+            proc.wait()
+        if proc.returncode != 0:
+            err = proc.stderr.read().decode("utf-8", errors="replace")
+            raise RuntimeError(f"curl failed (rc={proc.returncode}): {err[:300]}")
+        size = out_path.stat().st_size if out_path.exists() else 0
+        if size == 0:
+            raise RuntimeError("curl wrote 0 bytes")
+        _slog(f"  [dl]  done: {size/1_048_576:.1f} MB")
+        return out_path
+    # ── urllib3 chunked fallback ───────────────────────────────
+    _slog("  [dl]  backend: urllib3 chunked")
+    pool    = _get_pool()
+    headers = {
+        "User-Agent":      UA,
+        "Referer":         referer,
+        "Origin":          SITE_ORIGIN,
+        "Accept":          "*/*",
+        "Accept-Encoding": "identity",
+    }
+    resp = pool.request(
+        "GET", url, headers=headers,
+        timeout=urllib3.Timeout(connect=15, read=300),
+        preload_content=False,
+    )
+    if resp.status != 200:
+        resp.drain_conn()
+        raise RuntimeError(f"HTTP {resp.status}")
+    total = int(resp.headers.get("Content-Length", 0)) or 0
+    CHUNK = 256 * 1024
+    downloaded = 0
+    start = time.time()
+    with open(out_path, "wb") as f:
+        for chunk in resp.stream(CHUNK):
+            if not chunk:
+                continue
+            f.write(chunk)
+            downloaded += len(chunk)
+            elapsed = max(time.time() - start, 0.001)
+            speed   = downloaded / elapsed
+            if progress:
+                progress(downloaded, total, speed)
+    _slog(f"  [dl]  done: {downloaded/1_048_576:.1f} MB")
+    return out_path
+# ── HLS segment downloader (same as java-extention) ───────────
+def _fetch_text(url: str, referer: str = SITE_REFERER) -> str:
+    if _IS_TERMUX and _HAS_SYS_CURL:
+        status, text = _curl_get_text(url, referer=referer)
+        if status != 200:
+            raise RuntimeError(f"HTTP {status} fetching {url}")
+        return text
+    pool = _get_pool()
+    r    = pool.request("GET", url, headers={
+        "User-Agent": UA, "Referer": referer, "Accept-Encoding": "identity",
+    }, timeout=urllib3.Timeout(connect=15, read=60))
+    if r.status != 200:
+        raise RuntimeError(f"HTTP {r.status} fetching {url}")
+    return r.data.decode("utf-8", errors="replace")
+def _validate_m3u8(text: str, url: str) -> bool:
+    if "#EXTM3U" not in text[:500]:
+        _slog(f"  [dl]  ERROR: m3u8 at {url} did not return a valid playlist")
+        _slog(f"  [dl]  first 300 chars: {text[:300]!r}")
+        return False
+    return True
+def _parse_m3u8_variants(text: str, base_url: str) -> list[dict]:
+    variants = []
+    lines    = text.splitlines()
+    for i, line in enumerate(lines):
+        if line.startswith("#EXT-X-STREAM-INF"):
+            bw = 0
+            m  = re.search(r"BANDWIDTH=(\d+)", line)
+            if m:
+                bw = int(m.group(1))
+            res = ""
+            m2  = re.search(r"RESOLUTION=(\d+x\d+)", line)
+            if m2:
+                res = m2.group(1)
+            if i + 1 < len(lines) and not lines[i + 1].startswith("#"):
+                seg_url = urljoin(base_url, lines[i + 1].strip())
+                variants.append({"url": seg_url, "bandwidth": bw, "resolution": res})
+    return variants
+def _parse_segment_playlist(text: str, base_url: str) -> list[str]:
+    urls = []
+    for line in text.splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        urls.append(urljoin(base_url, line))
+    return urls
+def _curl_download_segment(url: str, out_path: Path, timeout: float = 120) -> int:
+    curl_bin = shutil.which("curl") or "curl"
+    cmd = [
+        curl_bin, "-sS", "-L",
+        "--max-time", str(int(timeout)),
+        "-A", UA,
+        "-H", f"Referer: {SITE_REFERER}",
+        "-H", f"Origin: {SITE_ORIGIN}",
+        "-H", "Accept: */*",
+        "-H", "Accept-Encoding: identity",
+        "-H", "Sec-Fetch-Dest: empty",
+        "-H", "Sec-Fetch-Mode: cors",
+        "-H", "Sec-Fetch-Site: cross-site",
+        "-o", str(out_path),
+        "--write-out", "\n__STATUS__:%{http_code}",
+        url,
+    ]
+    result  = subprocess.run(cmd, capture_output=True, timeout=timeout + 5)
+    raw     = result.stdout.decode("utf-8", errors="replace")
+    status  = 0
+    if "\n__STATUS__:" in raw:
+        _, st  = raw.rsplit("\n__STATUS__:", 1)
+        status = int(st.strip()) if st.strip().isdigit() else 0
+    if status == 200 and out_path.exists() and out_path.stat().st_size > 0:
+        return out_path.stat().st_size
+    err = result.stderr.decode("utf-8", errors="replace")
+    if err.strip():
+        _slog(f"  [dl]  curl seg stderr: {err[:200]}")
+    raise RuntimeError(f"curl seg HTTP {status}")
+def _download_segment(url: str, idx: int, total: int, out_path: Path) -> int:
+    MAX_TRIES = 5
+    delay     = 1
+    for attempt in range(1, MAX_TRIES + 1):
+        try:
+            if _IS_TERMUX and _HAS_SYS_CURL:
+                return _curl_download_segment(url, out_path)
+            pool = _get_pool()
+            r    = pool.request("GET", url, headers={
+                "User-Agent":      UA,
+                "Referer":         SITE_REFERER,
+                "Origin":          SITE_ORIGIN,
+                "Accept":          "*/*",
+                "Accept-Encoding": "identity",
+            }, timeout=urllib3.Timeout(connect=10, read=120), preload_content=False)
+            if r.status != 200:
+                r.drain_conn()
+                raise RuntimeError(f"HTTP {r.status}")
+            CHUNK = 128 * 1024
+            sz    = 0
+            with open(out_path, "wb") as f:
+                for chunk in r.stream(CHUNK):
+                    if chunk:
+                        f.write(chunk)
+                        sz += len(chunk)
+            return sz
+        except Exception as e:
+            _slog(f"  [dl]  seg {idx+1}/{total}  HTTP error attempt {attempt}/{MAX_TRIES}: {e}")
+            if attempt < MAX_TRIES:
+                _slog(f"  [dl]  retrying in {delay}s…")
+                time.sleep(delay)
+                delay = min(delay * 2, 30)
+            else:
+                raise
+def _resume_dir(key: str) -> Path:
+    h = hashlib.md5(key.encode(), usedforsecurity=False).hexdigest()[:14]
+    d = Path(tempfile.gettempdir()) / f"fpv_{h}"
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+def _run_hls_download(
+    urls:       list[str],
+    threads:    int,
+    progress:   Optional[ProgressCB],
+    resume_key: str = "",
+) -> tuple[Path, list[str]]:
+    total   = len(urls)
+    tmp_dir = _resume_dir(resume_key) if resume_key else Path(tempfile.mkdtemp(prefix="fpv_"))
+    _slog(f"  [dl]  tmp dir: {tmp_dir}")
+    already = sum(
+        1 for i in range(total)
+        if (tmp_dir / f"{i}.ts").exists() and (tmp_dir / f"{i}.ts").stat().st_size > 0
+    )
+    if already:
+        _slog(f"  [dl]  resume: {already}/{total} segments already on disk — skipping")
+    pending = [
+        i for i in range(total)
+        if not ((tmp_dir / f"{i}.ts").exists() and (tmp_dir / f"{i}.ts").stat().st_size > 0)
+    ]
+    completed = already
+    failed: list[str] = []
+    lock    = threading.Lock()
+    def _task(idx_url: tuple[int, str]) -> tuple[int, int]:
+        idx, url = idx_url
+        out = tmp_dir / f"{idx}.ts"
+        if out.exists() and out.stat().st_size > 0:
+            return idx, out.stat().st_size
+        sz = _download_segment(url, idx, total, out)
+        _slog(f"  [dl]  seg {idx+1}/{total}  {sz//1024}KB")
+        return idx, sz
+    with ThreadPoolExecutor(max_workers=threads) as ex:
+        futs = {ex.submit(_task, (i, urls[i])): i for i in range(total)}
+        for fut in as_completed(futs):
+            try:
+                idx, sz = fut.result()
+            except Exception as e:
+                idx = futs[fut]
+                _slog(f"  [dl]  seg {idx+1}/{total} FAILED: {e}")
+                failed.append(f"seg {idx+1}: {e}")
+                (tmp_dir / f"{idx}.ts").write_bytes(b"")
+            with lock:
+                completed += 1
+            if progress:
+                progress(completed, total, 0)
+    return tmp_dir, failed
+def _ffmpeg_concat(tmp_dir: Path, total: int, out_path: Path) -> None:
+    if not _HAS_FFMPEG:
+        _slog("  [dl]  ffmpeg not found — raw byte concat")
+        _raw_concat(tmp_dir, total, out_path)
+        return
+    lines    = []
+    filelist = tmp_dir / "filelist.txt"
+    for i in range(total):
+        p = tmp_dir / f"{i}.ts"
+        if p.exists() and p.stat().st_size > 0:
+            lines.append(f"file '{p}'\n")
+    filelist.write_text("".join(lines), encoding="utf-8")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    cmd    = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(filelist), "-c", "copy", str(out_path)]
+    result = subprocess.run(cmd, capture_output=True, timeout=600)
+    if result.returncode != 0:
+        _slog(f"  [dl]  ffmpeg error: {result.stderr.decode('utf-8', errors='replace')[-400:]}")
+        _raw_concat(tmp_dir, total, out_path)
+    try:
+        filelist.unlink(missing_ok=True)
+    except Exception:
+        pass
+    for i in range(total):
+        try:
+            (tmp_dir / f"{i}.ts").unlink(missing_ok=True)
+        except Exception:
+            pass
+def _raw_concat(tmp_dir: Path, total: int, out_path: Path) -> None:
+    CHUNK = 256 * 1024
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "wb") as fout:
+        for i in range(total):
+            p = tmp_dir / f"{i}.ts"
+            if p.exists() and p.stat().st_size > 0:
+                with open(p, "rb") as fin:
+                    while True:
+                        chunk = fin.read(CHUNK)
+                        if not chunk:
+                            break
+                        fout.write(chunk)
+            try:
+                p.unlink(missing_ok=True)
+            except Exception:
+                pass
+def download_hls(
+    m3u8_url: str,
+    out_path: Path,
+    threads:  int = 8,
+    progress: Optional[ProgressCB] = None,
+) -> Path:
+    referer = SITE_REFERER
+    _slog("  [dl]  resolving HLS playlist…")
+    text = _fetch_text(m3u8_url, referer)
+    if not _validate_m3u8(text, m3u8_url):
+        raise RuntimeError(f"m3u8 returned non-playlist content: {text[:200]!r}")
+    if "#EXT-X-STREAM-INF" in text:
+        variants = _parse_m3u8_variants(text, m3u8_url)
+        if variants:
+            variants.sort(key=lambda v: v["bandwidth"], reverse=True)
+            m3u8_url = variants[0]["url"]
+            text     = _fetch_text(m3u8_url, referer)
+    segments = _parse_segment_playlist(text, m3u8_url)
+    if not segments:
+        raise RuntimeError("No segments in playlist")
+    _slog(f"  [dl]  HLS: {len(segments)} segments  threads={threads}  ffmpeg={_HAS_FFMPEG}")
+    tmp_dir, failed = _run_hls_download(segments, threads, progress, resume_key=m3u8_url)
+    _slog("  [dl]  concatenating…")
+    _ffmpeg_concat(tmp_dir, len(segments), out_path)
+    try:
+        tmp_dir.rmdir()
+    except Exception:
+        pass
+    size = out_path.stat().st_size
+    _slog(f"  [dl]  saved: {out_path}  ({size/1_048_576:.1f} MB)")
+    if failed:
+        _slog(f"  [dl]  WARNING: {len(failed)} segment(s) failed")
+    return out_path