npm - @gajae-code/coding-agent - Versions diffs - 0.7.1 → 0.7.3 - Mend

@gajae-code/coding-agent 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

package/CHANGELOG.md +57 -0
package/dist/types/cli/mcp-cli.d.ts +25 -0
package/dist/types/cli/notify-cli.d.ts +2 -0
package/dist/types/cli.d.ts +6 -0
package/dist/types/commands/mcp.d.ts +70 -0
package/dist/types/config/keybindings.d.ts +2 -2
package/dist/types/config/settings-schema.d.ts +39 -2
package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
package/dist/types/extensibility/shared-events.d.ts +1 -0
package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
package/dist/types/lsp/types.d.ts +2 -0
package/dist/types/modes/components/custom-editor.d.ts +1 -1
package/dist/types/modes/components/model-selector.d.ts +2 -0
package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
package/dist/types/modes/theme/defaults/index.d.ts +99 -0
package/dist/types/notifications/attachment-registry.d.ts +17 -0
package/dist/types/notifications/chat-adapters.d.ts +9 -0
package/dist/types/notifications/config.d.ts +9 -1
package/dist/types/notifications/engine.d.ts +59 -0
package/dist/types/notifications/managed-daemon.d.ts +48 -0
package/dist/types/notifications/operator-runtime.d.ts +52 -0
package/dist/types/notifications/telegram-daemon.d.ts +73 -16
package/dist/types/notifications/threaded-inbound.d.ts +19 -0
package/dist/types/notifications/threaded-render.d.ts +6 -1
package/dist/types/notifications/topic-registry.d.ts +2 -0
package/dist/types/session/agent-session.d.ts +2 -0
package/dist/types/tools/composer-bash-policy.d.ts +14 -0
package/dist/types/tools/fetch.d.ts +23 -0
package/dist/types/tools/index.d.ts +1 -0
package/dist/types/tools/telegram-send.d.ts +32 -0
package/dist/types/web/insane/bridge.d.ts +103 -0
package/dist/types/web/insane/url-guard.d.ts +25 -0
package/dist/types/web/scrapers/types.d.ts +5 -0
package/dist/types/web/scrapers/utils.d.ts +7 -1
package/dist/types/web/search/provider.d.ts +18 -1
package/dist/types/web/search/providers/insane.d.ts +53 -0
package/dist/types/web/search/providers/text-citations.d.ts +23 -0
package/dist/types/web/search/types.d.ts +12 -4
package/package.json +10 -8
package/scripts/verify-insane-vendor.ts +132 -0
package/src/cli/args.ts +1 -1
package/src/cli/fast-help.ts +1 -1
package/src/cli/mcp-cli.ts +272 -0
package/src/cli/notify-cli.ts +152 -5
package/src/cli.ts +6 -2
package/src/commands/mcp.ts +117 -0
package/src/commands/team.ts +1 -1
package/src/config/keybindings.ts +2 -2
package/src/config/settings-schema.ts +30 -1
package/src/deep-interview/plaintext-gate-guard.ts +94 -0
package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
package/src/defaults/gjc/skills/team/SKILL.md +3 -2
package/src/extensibility/extensions/runner.ts +1 -0
package/src/extensibility/shared-events.ts +1 -0
package/src/gjc-runtime/launch-tmux.ts +17 -3
package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
package/src/gjc-runtime/ralplan-runtime.ts +2 -2
package/src/gjc-runtime/tmux-common.ts +3 -1
package/src/gjc-runtime/ultragoal-guard.ts +25 -8
package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
package/src/gjc-runtime/workflow-manifest.ts +7 -2
package/src/hooks/skill-state.ts +57 -0
package/src/internal-urls/docs-index.generated.ts +14 -11
package/src/lsp/config.ts +16 -3
package/src/lsp/defaults.json +7 -0
package/src/lsp/types.ts +2 -0
package/src/modes/bridge/bridge-mode.ts +11 -0
package/src/modes/components/custom-editor.ts +2 -0
package/src/modes/components/footer.ts +2 -3
package/src/modes/components/model-selector.ts +12 -0
package/src/modes/components/status-line/git-utils.ts +25 -0
package/src/modes/components/status-line.ts +10 -11
package/src/modes/components/welcome.ts +2 -3
package/src/modes/controllers/event-controller.ts +15 -0
package/src/modes/controllers/selector-controller.ts +3 -0
package/src/modes/interactive-mode.ts +48 -3
package/src/modes/shared/agent-wire/scopes.ts +1 -1
package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
package/src/modes/theme/defaults/index.ts +2 -0
package/src/modes/utils/context-usage.ts +2 -2
package/src/notifications/attachment-registry.ts +23 -0
package/src/notifications/chat-adapters.ts +147 -0
package/src/notifications/config.ts +23 -2
package/src/notifications/engine.ts +100 -0
package/src/notifications/index.ts +180 -38
package/src/notifications/managed-daemon.ts +163 -0
package/src/notifications/operator-runtime.ts +171 -0
package/src/notifications/telegram-daemon.ts +553 -236
package/src/notifications/threaded-inbound.ts +60 -4
package/src/notifications/threaded-render.ts +20 -2
package/src/notifications/topic-registry.ts +5 -0
package/src/session/agent-session.ts +82 -51
package/src/slash-commands/helpers/parse.ts +2 -1
package/src/tools/bash.ts +9 -0
package/src/tools/composer-bash-policy.ts +96 -0
package/src/tools/fetch.ts +94 -1
package/src/tools/index.ts +3 -0
package/src/tools/telegram-send.ts +137 -0
package/src/web/insane/bridge.ts +350 -0
package/src/web/insane/url-guard.ts +159 -0
package/src/web/scrapers/types.ts +143 -45
package/src/web/scrapers/utils.ts +70 -19
package/src/web/search/provider.ts +77 -18
package/src/web/search/providers/anthropic.ts +70 -3
package/src/web/search/providers/codex.ts +1 -119
package/src/web/search/providers/gemini.ts +99 -0
package/src/web/search/providers/insane.ts +551 -0
package/src/web/search/providers/openai-compatible.ts +66 -32
package/src/web/search/providers/text-citations.ts +111 -0
package/src/web/search/types.ts +13 -2
package/vendor/insane-search/LICENSE +21 -0
package/vendor/insane-search/MANIFEST.json +24 -0
package/vendor/insane-search/engine/__init__.py +23 -0
package/vendor/insane-search/engine/__main__.py +128 -0
package/vendor/insane-search/engine/bias_check.py +183 -0
package/vendor/insane-search/engine/executor.py +254 -0
package/vendor/insane-search/engine/fetch_chain.py +725 -0
package/vendor/insane-search/engine/learning.py +175 -0
package/vendor/insane-search/engine/phase0.py +214 -0
package/vendor/insane-search/engine/safety.py +91 -0
package/vendor/insane-search/engine/templates/package.json +11 -0
package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
package/vendor/insane-search/engine/tests/test_u1.py +200 -0
package/vendor/insane-search/engine/tests/test_u4.py +131 -0
package/vendor/insane-search/engine/tests/test_u5.py +163 -0
package/vendor/insane-search/engine/tests/test_u7.py +124 -0
package/vendor/insane-search/engine/transport.py +211 -0
package/vendor/insane-search/engine/url_transforms.py +98 -0
package/vendor/insane-search/engine/validators.py +331 -0
package/vendor/insane-search/engine/waf_detector.py +214 -0
package/vendor/insane-search/engine/waf_profiles.yaml +162 -0

package/vendor/insane-search/engine/learning.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""U5: lightweight per-host self-learning store (`observations/learned.json`).
+Records which fetch route (impersonate × referer × url-transform × phase) last
+SUCCEEDED for a host, so the next visit promotes it to the probe / front of the
+grid instead of rediscovering it from scratch. The store is bounded and
+self-pruning so it can never grow without limit:
+  * eviction on failure — a learned route that fails on a REAL block
+    (`exhausted` / `challenge` / `blocked`) earns a strike; after
+    ``EVICT_AFTER_FAILS`` consecutive real failures the entry is deleted.
+    Transient outcomes (429 rate-limit, network/unknown error, budget cut) and
+    URL-level outcomes (404/401) never strike — they are not the route's fault.
+  * TTL — an entry unused for ``TTL_DAYS`` is pruned the next time the store is
+    loaded (default 30 days).
+  * cap — at most ``MAX_ENTRIES`` (default 500); on overflow the
+    least-recently-used entries are dropped.
+This is a DATA file, never code, so the No-Site-Name Rule (R3) holds: per-site
+knowledge lives in JSON that both the engine and the agent can read, while the
+fetch chain itself stays site-agnostic.
+"""
+from __future__ import annotations
+import json
+import os
+from datetime import datetime, timezone, timedelta
+from typing import Optional
+from urllib.parse import urlsplit
+TTL_DAYS = int(os.environ.get("INSANE_LEARN_TTL_DAYS", "30"))
+MAX_ENTRIES = int(os.environ.get("INSANE_LEARN_MAX", "500"))
+EVICT_AFTER_FAILS = 2
+# stop_reason values that mean the bypass ROUTE genuinely failed (→ strike).
+# Everything else (rate_limited / unknown / budget / auth_required / not_found /
+# success / "") is transient or URL-level and never strikes the route.
+PENALIZE_REASONS = frozenset({"exhausted", "challenge", "blocked"})
+def enabled() -> bool:
+    return os.environ.get("INSANE_LEARN", "1") not in ("0", "false", "no")
+def default_path() -> str:
+    p = os.environ.get("INSANE_LEARNED_PATH")
+    if p:
+        return p
+    return os.path.join(os.path.expanduser("~"), ".insane_search", "learned.json")
+def is_real_failure(stop_reason: str) -> bool:
+    """True when `stop_reason` means the route itself was blocked (→ strike)."""
+    return (stop_reason or "") in PENALIZE_REASONS
+def key_for(url: str, device_class: str) -> str:
+    host = (urlsplit(url).netloc or "").lower()
+    dev = "mobile" if device_class == "mobile" else "desktop"
+    return f"{host}::{dev}"
+def _now() -> datetime:
+    return datetime.now(timezone.utc)
+def _parse(ts: str) -> Optional[datetime]:
+    try:
+        dt = datetime.fromisoformat(ts)
+        return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+    except Exception:
+        return None
+def _prune(data: dict, now: Optional[datetime] = None) -> dict:
+    """Drop TTL-expired entries, then enforce the LRU cap. Pure (in-memory)."""
+    now = now or _now()
+    cutoff = now - timedelta(days=TTL_DAYS)
+    kept = {}
+    for k, v in data.items():
+        lu = _parse(v.get("last_used", "")) if isinstance(v, dict) else None
+        if lu is None or lu >= cutoff:
+            kept[k] = v
+    if len(kept) > MAX_ENTRIES:
+        # keep the MAX_ENTRIES most-recently-used
+        ordered = sorted(
+            kept.items(),
+            key=lambda kv: _parse(kv[1].get("last_used", "")) or now,
+            reverse=True,
+        )
+        kept = dict(ordered[:MAX_ENTRIES])
+    return kept
+def load(path: Optional[str] = None) -> dict:
+    """Load the store, pruning TTL-expired + over-cap entries in memory.
+    Pruning is not persisted here (write-on-read is wasteful); the next
+    `record_*` save writes the pruned set back, so the file converges."""
+    path = path or default_path()
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return {}
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {}
+    return _prune(data)
+def save(data: dict, path: Optional[str] = None) -> None:
+    path = path or default_path()
+    try:
+        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+        tmp = f"{path}.tmp"
+        with open(tmp, "w", encoding="utf-8") as f:
+            json.dump(data, f, ensure_ascii=False, indent=2, sort_keys=True)
+        os.replace(tmp, path)
+    except OSError:
+        pass  # learning is best-effort; never break a fetch on a write error
+def lookup(url: str, device_class: str, path: Optional[str] = None,
+           data: Optional[dict] = None) -> Optional[dict]:
+    """Return the learned route dict for this host, or None."""
+    data = load(path) if data is None else data
+    entry = data.get(key_for(url, device_class))
+    if isinstance(entry, dict):
+        route = entry.get("route")
+        if isinstance(route, dict):
+            return route
+    return None
+def record_success(url: str, device_class: str, route: dict,
+                   path: Optional[str] = None) -> None:
+    """Upsert the winning route for this host (resets the failure strike)."""
+    path = path or default_path()
+    data = load(path)
+    k = key_for(url, device_class)
+    now = _now().isoformat()
+    raw = data.get(k)
+    entry = raw if isinstance(raw, dict) else {}
+    same = entry.get("route") == route
+    data[k] = {
+        "route": route,
+        "wins": int(entry.get("wins", 0)) + 1 if same else 1,
+        "consecutive_fails": 0,
+        "last_used": now,
+        "last_success": now,
+    }
+    save(_prune(data), path)
+def record_failure(url: str, device_class: str, penalize: bool,
+                   path: Optional[str] = None) -> None:
+    """Record that the learned route did not win this run.
+    `penalize=True` (a real block) strikes the entry and deletes it after
+    EVICT_AFTER_FAILS consecutive strikes. `penalize=False` (transient / URL
+    issue) just refreshes `last_used` so an actively-retried host is not
+    TTL-pruned. No-op when nothing was learned for this host."""
+    path = path or default_path()
+    data = load(path)
+    k = key_for(url, device_class)
+    entry = data.get(k)
+    if not isinstance(entry, dict):
+        return
+    if penalize:
+        entry["consecutive_fails"] = int(entry.get("consecutive_fails", 0)) + 1
+        entry["last_used"] = _now().isoformat()
+        if entry["consecutive_fails"] >= EVICT_AFTER_FAILS:
+            del data[k]
+    else:
+        entry["last_used"] = _now().isoformat()
+    save(_prune(data), path)

package/vendor/insane-search/engine/phase0.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""Phase 0 — official public-API router (the SANCTIONED exception to No-Site-Name).
+Per SKILL.md R5, platforms that publish official no-auth public endpoints get a
+deterministic route tried BEFORE the generic WAF grid. This is the *enforced,
+in-engine* version of what used to be agent-driven curl snippets in SKILL.md —
+so the agent can no longer silently skip it (which is exactly how Reddit/X were
+wrongly declared "blocked": the grid 403'd on `.json` and nobody tried `.rss`).
+This file is the ONLY engine/ module allowed to name platform hosts; it is
+exempted in `bias_check.EXPLICIT_ALLOW_FILES`. Do NOT add per-site logic to any
+other engine file — generic WAF handling stays site-agnostic.
+Contract:
+    route(url) -> Optional[dict]
+      None              → url is not a recognised Phase-0 platform; caller runs
+                          the generic grid as usual.
+      {"platform","ok","route","content","final_url","attempts":[...]}
+                        → recognised platform. `ok` says whether an official
+                          route succeeded. Even on ok=False the caller should
+                          fall through to the grid, but `attempts` is recorded
+                          so failure is never silent.
+Each attempt dict: {"route","platform","ok","status","bytes","note"}.
+"""
+from __future__ import annotations
+import re
+import subprocess
+from typing import Optional
+from urllib.parse import urlsplit
+# --- low-level helpers -------------------------------------------------------
+def _cffi_get(url: str, *, impersonate: str = "safari", timeout: int = 15):
+    from curl_cffi import requests as r  # lazy: engine works even if missing
+    from . import safety
+    allow_private = safety.allow_private_default()
+    ok, reason = safety.classify_url(url, allow_private)
+    if not ok:
+        raise RuntimeError(f"ssrf_blocked:{reason}")
+    headers = {
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.9,ko;q=0.8",
+    }
+    cur = url
+    for _ in range(safety.DEFAULT_MAX_REDIRECTS + 1):
+        resp = r.get(
+            cur,
+            impersonate=impersonate,  # type: ignore[arg-type]
+            timeout=timeout,
+            headers=headers,
+            allow_redirects=False,
+        )
+        if not safety.is_redirect(resp):
+            return resp
+        loc = safety.location_of(resp)
+        if not loc:
+            return resp
+        nxt = safety.resolve_redirect(cur, loc)
+        ok, reason = safety.classify_url(nxt, allow_private)
+        if not ok:
+            raise RuntimeError(f"ssrf_redirect_blocked:{reason}")
+        cur = nxt
+    raise RuntimeError("too_many_redirects")
+def _host(url: str) -> str:
+    h = (urlsplit(url).hostname or "").lower()
+    return h[4:] if h.startswith("www.") else h  # strip the literal "www." prefix only
+def _attempt(platform: str, route: str, ok: bool, status: int, body: str, note: str = "") -> dict:
+    return {"platform": platform, "route": route, "ok": ok, "status": status,
+            "bytes": len(body or ""), "note": note}
+# --- platform detectors ------------------------------------------------------
+def _detect(url: str) -> Optional[str]:
+    h = _host(url)
+    if not h:
+        return None
+    if "reddit.com" in h or h == "redd.it":
+        return "reddit"
+    if h in ("x.com", "twitter.com") or h.endswith(".x.com") or h.endswith(".twitter.com"):
+        return "x"
+    if "youtube.com" in h or h == "youtu.be":
+        return "youtube"
+    return None
+# --- reddit ------------------------------------------------------------------
+def _reddit(url: str, timeout: int) -> dict:
+    attempts: list[dict] = []
+    base = url.split("?", 1)[0].rstrip("/")
+    # Build an .rss / .json target from the path (works for /r/<sub> and post URLs).
+    rss_url = base + ("/.rss" if "/comments/" not in base else ".rss")
+    json_url = base + ("/.json" if "/comments/" not in base else ".json")
+    # Route 1: RSS (the route that actually survives — Reddit gates the JSON API).
+    try:
+        x = _cffi_get(rss_url, timeout=timeout)
+        ok = x.status_code == 200 and ("<rss" in x.text or "<feed" in x.text)
+        attempts.append(_attempt("reddit", "rss", ok, x.status_code, x.text,
+                                 "feed" if ok else "no-feed-markers"))
+        if ok:
+            return {"platform": "reddit", "ok": True, "route": "rss",
+                    "content": x.text, "final_url": rss_url, "attempts": attempts}
+    except Exception as e:
+        attempts.append(_attempt("reddit", "rss", False, 0, "", f"{type(e).__name__}"))
+    # Route 2: JSON via curl_cffi (often 403 now, but try — cheap).
+    try:
+        x = _cffi_get(json_url, timeout=timeout)
+        ok = x.status_code == 200 and x.text.lstrip().startswith(("{", "["))
+        attempts.append(_attempt("reddit", "json", ok, x.status_code, x.text,
+                                 "json" if ok else f"status={x.status_code}"))
+        if ok:
+            return {"platform": "reddit", "ok": True, "route": "json",
+                    "content": x.text, "final_url": json_url, "attempts": attempts}
+    except Exception as e:
+        attempts.append(_attempt("reddit", "json", False, 0, "", f"{type(e).__name__}"))
+    return {"platform": "reddit", "ok": False, "route": None, "content": "",
+            "final_url": url, "attempts": attempts}
+# --- x / twitter -------------------------------------------------------------
+_TWEET_ID_RE = re.compile(r"/status(?:es)?/(\d+)")
+def _x(url: str, timeout: int) -> dict:
+    attempts: list[dict] = []
+    m = _TWEET_ID_RE.search(url)
+    if m:  # single tweet → tweet-result + oembed (both no-auth, reliable)
+        tid = m.group(1)
+        try:
+            x = _cffi_get(f"https://cdn.syndication.twimg.com/tweet-result?id={tid}&token=a", timeout=timeout)
+            d = x.json() if x.status_code == 200 else {}
+            ok = bool(d.get("text"))
+            attempts.append(_attempt("x", "tweet-result", ok, x.status_code, x.text,
+                                     "has-text" if ok else f"status={x.status_code}"))
+            if ok:
+                return {"platform": "x", "ok": True, "route": "tweet-result",
+                        "content": x.text, "final_url": url, "attempts": attempts}
+        except Exception as e:
+            attempts.append(_attempt("x", "tweet-result", False, 0, "", f"{type(e).__name__}"))
+        try:
+            ourl = f"https://publish.twitter.com/oembed?url=https://twitter.com/i/status/{tid}&omit_script=1"
+            x = _cffi_get(ourl, timeout=timeout)
+            d = x.json() if x.status_code == 200 else {}
+            ok = bool(d.get("html"))
+            attempts.append(_attempt("x", "oembed", ok, x.status_code, x.text,
+                                     "has-html" if ok else f"status={x.status_code}"))
+            if ok:
+                return {"platform": "x", "ok": True, "route": "oembed",
+                        "content": x.text, "final_url": ourl, "attempts": attempts}
+        except Exception as e:
+            attempts.append(_attempt("x", "oembed", False, 0, "", f"{type(e).__name__}"))
+    else:  # profile timeline → syndication (rate-limit-prone; retry once)
+        handle = urlsplit(url).path.strip("/").split("/")[0]
+        _reserved = {"i", "search", "home", "explore", "messages", "notifications", "settings", "hashtag"}
+        if handle and handle.lower() not in _reserved:
+            surl = f"https://syndication.twitter.com/srv/timeline-profile/screen-name/{handle}"
+            for attempt_no in range(2):
+                try:
+                    x = _cffi_get(surl, timeout=timeout)
+                    ok = x.status_code == 200 and "__NEXT_DATA__" in x.text
+                    attempts.append(_attempt("x", f"syndication-timeline#{attempt_no+1}", ok,
+                                             x.status_code, x.text,
+                                             "timeline" if ok else f"status={x.status_code}"))
+                    if ok:
+                        return {"platform": "x", "ok": True, "route": "syndication-timeline",
+                                "content": x.text, "final_url": surl, "attempts": attempts}
+                except Exception as e:
+                    attempts.append(_attempt("x", f"syndication-timeline#{attempt_no+1}", False, 0, "", f"{type(e).__name__}"))
+    return {"platform": "x", "ok": False, "route": None, "content": "",
+            "final_url": url, "attempts": attempts}
+# --- youtube -----------------------------------------------------------------
+def _youtube(url: str, timeout: int) -> dict:
+    attempts: list[dict] = []
+    try:
+        p = subprocess.run(
+            ["yt-dlp", "--dump-json", "--skip-download", url],
+            capture_output=True, text=True, timeout=max(timeout, 60),
+        )
+        ok = p.returncode == 0 and p.stdout.strip().startswith("{")
+        note = "json" if ok else (p.stderr or "").strip()[:80]
+        attempts.append(_attempt("youtube", "yt-dlp", ok, 200 if ok else 0, p.stdout, note))
+        if ok:
+            return {"platform": "youtube", "ok": True, "route": "yt-dlp",
+                    "content": p.stdout, "final_url": url, "attempts": attempts}
+    except FileNotFoundError:
+        attempts.append(_attempt("youtube", "yt-dlp", False, 0, "", "yt-dlp not installed"))
+    except Exception as e:
+        attempts.append(_attempt("youtube", "yt-dlp", False, 0, "", f"{type(e).__name__}"))
+    return {"platform": "youtube", "ok": False, "route": None, "content": "",
+            "final_url": url, "attempts": attempts}
+_ROUTERS = {"reddit": _reddit, "x": _x, "youtube": _youtube}
+# --- public entrypoint -------------------------------------------------------
+def route(url: str, *, timeout: int = 15) -> Optional[dict]:
+    platform = _detect(url)
+    if platform is None:
+        return None
+    return _ROUTERS[platform](url, timeout)

package/vendor/insane-search/engine/safety.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""SSRF / redirect safety guard for an agent-facing fetcher.
+curl_cffi follows redirects but does NOT validate the destination (confirmed
+against the official docs: there is no built-in private-IP/safe-redirect
+option). Since this engine fetches attacker-influenced URLs and follows their
+redirects, a hostile page could redirect to loopback, RFC-1918, link-local, or
+the cloud metadata endpoint (169.254.169.254) to exfiltrate internal data.
+This module provides a pure, deterministic classifier and a redirect resolver.
+Default-deny for private/internal targets; opt in with allow_private=True
+(env INSANE_ALLOW_PRIVATE=1) for local testing.
+"""
+from __future__ import annotations
+import ipaddress
+import os
+import socket
+from urllib.parse import urljoin, urlsplit
+ALLOWED_SCHEMES = {"http", "https"}
+DEFAULT_MAX_REDIRECTS = 10
+def allow_private_default() -> bool:
+    return os.environ.get("INSANE_ALLOW_PRIVATE", "") in ("1", "true", "yes")
+def _ip_blocked(ip_str: str) -> bool:
+    try:
+        ip = ipaddress.ip_address(ip_str)
+    except ValueError:
+        return False
+    return (ip.is_private or ip.is_loopback or ip.is_link_local
+            or ip.is_reserved or ip.is_multicast or ip.is_unspecified)
+def classify_url(url: str, allow_private: bool = False) -> tuple[bool, str]:
+    """(is_safe, reason). Blocks non-http(s) schemes and hosts that are — or
+    DNS-resolve to — private/loopback/link-local/reserved/metadata addresses."""
+    try:
+        p = urlsplit(url)
+    except Exception as e:
+        return False, f"parse_error:{e}"
+    if p.scheme not in ALLOWED_SCHEMES:
+        return False, f"scheme:{p.scheme or 'none'}"
+    host = p.hostname
+    if not host:
+        return False, "no_host"
+    if allow_private:
+        return True, "allow_private"
+    # IP literal host → check directly (covers cloud metadata, loopback, …)  # NOTE-BIAS-OK
+    try:
+        ipaddress.ip_address(host)
+        return (False, f"ip_blocked:{host}") if _ip_blocked(host) else (True, "public_ip")
+    except ValueError:
+        pass
+    # Hostname → resolve and check every A/AAAA (DNS-rebinding defense).
+    try:
+        port = p.port or (443 if p.scheme == "https" else 80)
+        infos = socket.getaddrinfo(host, port, proto=socket.IPPROTO_TCP)
+        ips = {info[4][0] for info in infos}
+    except Exception:
+        # Don't hard-fail on resolver hiccups — the real request will error out
+        # naturally; we only need to stop redirects INTO internal space.
+        return True, "resolve_failed_allow"
+    for ip in ips:
+        if _ip_blocked(str(ip)):
+            return False, f"resolves_internal:{host}->{ip}"
+    return True, "public"
+def location_of(resp) -> str | None:
+    """Case-insensitive Location header from a curl_cffi/requests response."""
+    try:
+        headers = {k.lower(): v for k, v in dict(getattr(resp, "headers", {}) or {}).items()}
+        return headers.get("location")
+    except Exception:
+        return None
+def is_redirect(resp) -> bool:
+    try:
+        return int(getattr(resp, "status_code", 0) or 0) in (301, 302, 303, 307, 308)
+    except Exception:
+        return False
+def resolve_redirect(base_url: str, location: str) -> str:
+    return urljoin(base_url, location)

package/vendor/insane-search/engine/templates/package.json ADDED Viewed

@@ -0,0 +1,11 @@
+{
+  "name": "insane-search-templates",
+  "version": "0.1.0",
+  "private": true,
+  "description": "Local deps for Playwright real-Chrome templates. npm install && npx playwright install chrome",
+  "dependencies": {
+    "playwright": "^1.58.2",
+    "playwright-extra": "^4.3.6",
+    "puppeteer-extra-plugin-stealth": "^2.11.2"
+  }
+}

package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js ADDED Viewed

@@ -0,0 +1,188 @@
+#!/usr/bin/env node
+/**
+ * Generic Playwright mobile fetcher — real Chrome + device emulation.
+ *
+ * Usage:
+ *   echo '{"url":"...", "device":"iPhone 13 Pro"}' | node playwright_mobile_chrome.js
+ *
+ * Device name must match playwright `devices[...]` keys (Pixel 7, iPhone 13 Pro,
+ * iPad Pro 11, etc.). When in doubt, omit `device` — default is iPhone 13 Pro.
+ *
+ * NO-SITE-NAME RULE: same as playwright_real_chrome.js — no hostname branches.
+ */
+const dns = require('dns').promises;
+const net = require('net');
+function writeStdoutAsync(payload) {
+  return new Promise((resolve, reject) => {
+    process.stdout.write(payload, (err) => (err ? reject(err) : resolve()));
+  });
+}
+async function buildEnvelope(ctx, page, html, resp, automation) {
+  let cookies = [];
+  try { cookies = (await ctx.cookies()).map((c) => ({ name: c.name, value: c.value, domain: c.domain })); } catch (_e) {}
+  let userAgent = '';
+  try { userAgent = await page.evaluate(() => navigator.userAgent); } catch (_e) {}
+  let finalUrl = '';
+  try { finalUrl = page.url(); } catch (_e) {}
+  let status = 0;
+  try { status = resp ? resp.status() : 0; } catch (_e) {}
+  return JSON.stringify({ html, finalUrl, status, cookies, userAgent, automation });
+}
+class UnsafeUrlError extends Error {
+  constructor(reason) {
+    super(`unsafe_url:${reason}`);
+    this.name = 'UnsafeUrlError';
+  }
+}
+function isBlockedHostname(hostname) {
+  const h = (hostname || '').toLowerCase().replace(/\.$/, '');
+  return !h || h === 'localhost' || h.endsWith('.localhost') || h.endsWith('.local') || h.endsWith('.internal') || h.endsWith('.home.arpa');
+}
+function isPrivateIPv4(address) {
+  const parts = address.split('.').map((part) => Number.parseInt(part, 10));
+  if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part) || part < 0 || part > 255)) return true;
+  const [a, b] = parts;
+  return a === 0 || a === 10 || a === 127 || (a === 100 && b >= 64 && b <= 127) ||
+    (a === 169 && b === 254) || (a === 172 && b >= 16 && b <= 31) ||
+    (a === 192 && (b === 0 || b === 168)) || (a === 198 && (b === 18 || b === 19 || b === 51)) ||
+    (a === 203 && b === 0) || a >= 224;
+}
+function normalizeIPv4MappedIPv6(address) {
+  const lower = address.toLowerCase();
+  return lower.startsWith('::ffff:') ? lower.slice(7) : lower;
+}
+function isPrivateIPv6(address) {
+  const lower = address.toLowerCase();
+  const mapped = normalizeIPv4MappedIPv6(lower);
+  if (mapped !== lower && net.isIP(mapped) === 4) return isPrivateIPv4(mapped);
+  return lower === '::' || lower === '::1' || lower.startsWith('fc') || lower.startsWith('fd') ||
+    lower.startsWith('fe8') || lower.startsWith('fe9') || lower.startsWith('fea') || lower.startsWith('feb') ||
+    lower.startsWith('ff') || lower.startsWith('2001:db8') || lower.startsWith('::ffff:');
+}
+function isPrivateOrSpecialAddress(address) {
+  const normalized = normalizeIPv4MappedIPv6(address);
+  const family = net.isIP(normalized);
+  if (family === 4) return isPrivateIPv4(normalized);
+  if (family === 6) return isPrivateIPv6(normalized);
+  if (net.isIP(address) === 6) return isPrivateIPv6(address);
+  return true;
+}
+async function assertPublicHttpUrl(rawUrl) {
+  let parsed;
+  try { parsed = new URL(rawUrl); } catch (_e) { throw new UnsafeUrlError('invalid_url'); }
+  if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') throw new UnsafeUrlError(`scheme:${parsed.protocol || 'none'}`);
+  if (parsed.username || parsed.password) throw new UnsafeUrlError('credentials');
+  if (isBlockedHostname(parsed.hostname)) throw new UnsafeUrlError('internal_host');
+  if (net.isIP(parsed.hostname)) {
+    if (isPrivateOrSpecialAddress(parsed.hostname)) throw new UnsafeUrlError(`ip_blocked:${parsed.hostname}`);
+    return;
+  }
+  let records;
+  try { records = await dns.lookup(parsed.hostname, { all: true, verbatim: true }); }
+  catch (_e) { throw new UnsafeUrlError('resolve_failed'); }
+  if (!records.length) throw new UnsafeUrlError('resolve_empty');
+  const blocked = records.find((record) => isPrivateOrSpecialAddress(record.address));
+  if (blocked) throw new UnsafeUrlError(`resolves_internal:${parsed.hostname}->${blocked.address}`);
+}
+async function assertPagePublic(page, label) {
+  let current = '';
+  try { current = page.url(); } catch (_e) {}
+  await assertPublicHttpUrl(current);
+  return current;
+}
+async function readStdinJson() {
+  return await new Promise((resolve, reject) => {
+    let data = '';
+    process.stdin.on('data', (c) => (data += c));
+    process.stdin.on('end', () => {
+      try { resolve(JSON.parse(data || '{}')); }
+      catch (e) { reject(e); }
+    });
+    process.stdin.on('error', reject);
+  });
+}
+async function main() {
+  const args = await readStdinJson();
+  const url = args.url;
+  if (!url) { process.stderr.write('missing url\n'); process.exit(2); }
+  await assertPublicHttpUrl(url);
+  const profileDir = args.profileDir || '/tmp/.insane_pw_mobile_profile';
+  const deviceName = args.device || 'iPhone 13 Pro';
+  const waitSelector = args.waitSelector || null;
+  const timeoutMs = args.timeout || 60000;
+  const headless = args.headless ?? false;
+  let chromium, devices;
+  let automation = 'playwright';
+  try {
+    // Patchright drop-in (additive; absent → previous behaviour unchanged).
+    ({ chromium, devices } = require('patchright'));
+    automation = 'patchright';
+  } catch (_e0) {
+    try {
+      ({ chromium, devices } = require('playwright-extra'));
+      const stealth = require('puppeteer-extra-plugin-stealth')();
+      chromium.use(stealth);
+      automation = 'playwright-extra+stealth';
+    } catch (_e) {
+      ({ chromium, devices } = require('playwright'));
+      automation = 'playwright';
+    }
+  }
+  const dev = devices[deviceName];
+  if (!dev) {
+    process.stderr.write(`unknown device: ${deviceName}\n`);
+    process.exit(2);
+  }
+  let ctx;
+  try {
+    ctx = await chromium.launchPersistentContext(profileDir, {
+      channel: 'chrome',
+      headless,
+      ...dev,
+    });
+    const page = await ctx.newPage();
+    const deadline = Date.now() + timeoutMs;
+    const rem = (cap) => Math.max(1000, Math.min(cap || timeoutMs, deadline - Date.now()));
+    const mainResp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: rem(90000) });
+    await assertPagePublic(page, 'main');
+    if (waitSelector) {
+      try {
+        await page.waitForSelector(waitSelector, { timeout: rem(20000) });
+      } catch (_e) {}
+    }
+    await assertPagePublic(page, 'content');
+    const html = await page.content();
+    const payload = await buildEnvelope(ctx, page, html, mainResp, automation);
+    await writeStdoutAsync(payload);  // flush fully before any exit
+    process.exitCode = 0;
+    return;                           // let finally close ctx, then exit naturally
+  } catch (e) {
+    process.stderr.write(`${e.name || 'Error'}: ${e.message || e}\n`);
+    process.exitCode = 1;
+    return;
+  } finally {
+    try { if (ctx) await ctx.close(); } catch (_e) {}
+  }
+}
+main();