npm - ultimate-pi - Versions diffs - 0.19.0 → 0.19.1 - Mend

ultimate-pi 0.19.0 → 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/.agents/skills/web-retrieval/SKILL.md +163 -0
package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
package/.pi/SYSTEM.md +30 -12
package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
package/.pi/agents/harness/planning/stack-researcher.md +5 -1
package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
package/.pi/extensions/harness-web-guard.ts +2 -1
package/.pi/extensions/harness-web-tools.ts +689 -51
package/.pi/harness/agents.manifest.json +29 -5
package/.pi/harness/agents.policy.yaml +34 -0
package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
package/.pi/harness/docs/harness-web-search.md +97 -0
package/.pi/harness/env.harness.template +9 -1
package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
package/.pi/harness/web-heuristic-angles.json +278 -0
package/.pi/harness/web-heuristic-angles.yaml +182 -0
package/.pi/lib/agents-policy.mjs +6 -0
package/.pi/lib/harness-subagent-auth.ts +39 -9
package/.pi/lib/harness-subagents-bridge.ts +21 -0
package/.pi/lib/harness-web/artifacts.ts +200 -0
package/.pi/lib/harness-web/cache.ts +369 -0
package/.pi/lib/harness-web/run-cli.ts +42 -2
package/.pi/prompts/harness-plan.md +1 -0
package/.pi/prompts/harness-setup.md +3 -1
package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
package/.pi/scripts/harness-cli-verify.sh +5 -0
package/.pi/scripts/harness-verify.mjs +78 -0
package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
package/.pi/scripts/harness-web.py +218 -15
package/.pi/scripts/harness_web/deep_search.py +55 -0
package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
package/.pi/scripts/harness_web/find_similar.py +88 -0
package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
package/.pi/scripts/harness_web/heuristic_config.py +251 -0
package/.pi/scripts/harness_web/highlights.py +47 -0
package/.pi/scripts/harness_web/multi_search.py +59 -0
package/.pi/scripts/harness_web/output.py +24 -0
package/.pi/scripts/harness_web/query_angles.py +116 -0
package/.pi/scripts/harness_web/rank.py +163 -0
package/.pi/scripts/harness_web/scrape.py +30 -0
package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
package/AGENTS.md +2 -2
package/CHANGELOG.md +6 -0
package/package.json +5 -3
package/.agents/skills/scrapling-web/SKILL.md +0 -98
package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0

package/.pi/scripts/harness-web.py CHANGED Viewed

@@ -9,6 +9,7 @@ import shutil
 import sys
 import time
 from pathlib import Path
+from urllib.parse import urlparse
 # Re-exec with scrapling's uv-tool Python when the library is not on default python3.
 def _bootstrap_scrapling() -> None:
@@ -34,10 +35,28 @@ if str(SCRIPT_DIR) not in sys.path:
     sys.path.insert(0, str(SCRIPT_DIR))
 from harness_web.config import HarnessWebConfig, load_config  # noqa: E402
-from harness_web.output import write_search_results  # noqa: E402
-from harness_web.scrape import bulk_scrape, map_url, scrape_url  # noqa: E402
+from harness_web.deep_search import run_deep_search  # noqa: E402
+from harness_web.evidence_bundle import build_evidence_bundle, write_evidence_bundle  # noqa: E402
+from harness_web.find_similar import run_find_similar  # noqa: E402
+from harness_web.output import (  # noqa: E402
+    write_deep_search_results,
+    write_search_results,
+)
+from harness_web.scrape import (  # noqa: E402
+    bulk_scrape,
+    map_url,
+    scrape_url,
+    scrape_url_with_highlights,
+)
 from harness_web.search import search  # noqa: E402
+TIER_LIMITS = {
+    "instant": 5,
+    "standard": 10,
+    "deep": 10,
+    "research": 15,
+}
 DEFAULT_WEB_DIR = ".web"
@@ -45,26 +64,153 @@ def _default_out(sub: str) -> Path:
     return Path(DEFAULT_WEB_DIR) / sub
+def _tier_limit(tier: str, cli_limit: int | None) -> int:
+    if cli_limit is not None:
+        return cli_limit
+    return TIER_LIMITS.get(tier, 10)
 def cmd_search(args: argparse.Namespace, config: HarnessWebConfig) -> int:
+    tier = getattr(args, "tier", None) or "standard"
+    limit = _tier_limit(tier, args.limit)
     out = Path(args.output or _default_out("search.json"))
-    results = search(args.query, limit=args.limit, config=config)
-    write_search_results(out, results, args.query, engine=config.search_engine)
-    print(f"wrote {out} ({len(results)} results)")
+    results = search(args.query, limit=limit, config=config)
+    write_search_results(out, results, args.query, engine=config.search_engine, tier=tier)
+    print(f"wrote {out} ({len(results)} results, tier={tier})")
     return 0
-def cmd_scrape(args: argparse.Namespace, config: HarnessWebConfig) -> int:
-    out = Path(args.output or _default_out("page.md"))
-    fast = config.use_fast_for_url(args.url, args.fast)
-    scrape_url(
+def cmd_search_deep(args: argparse.Namespace, config: HarnessWebConfig) -> int:
+    out = Path(args.output or _default_out("search-deep.json"))
+    angles_path = Path(args.angles_file) if args.angles_file else None
+    plan, ranked = run_deep_search(
+        args.query,
+        config=config,
+        angles_file=angles_path,
+        expand_heuristic=args.expand_heuristic,
+        category=args.category,
+        per_angle_limit=args.per_angle_limit,
+        final_limit=args.limit,
+    )
+    angle_dicts = [
+        {"id": a.id, "query": a.query, "rationale": a.rationale} for a in plan.angles
+    ]
+    write_deep_search_results(
+        out,
+        query=args.query,
+        engine=config.search_engine,
+        tier="deep",
+        plan_angles=angle_dicts,
+        ranked_web=ranked,
+    )
+    print(f"wrote {out} ({len(ranked)} fused results, {len(plan.angles)} angles)")
+    return 0
+def cmd_find_similar(args: argparse.Namespace, config: HarnessWebConfig) -> int:
+    out = Path(args.output or _default_out("search-deep.json"))
+    plan, ranked = run_find_similar(
         args.url,
-        str(out),
         config=config,
-        fast=fast,
-        wait_ms=args.wait_for,
+        final_limit=args.limit,
+        per_angle_limit=args.per_angle_limit,
+        fast_fetch=args.fast,
+    )
+    angle_dicts = [
+        {"id": a.id, "query": a.query, "rationale": a.rationale} for a in plan.angles
+    ]
+    write_deep_search_results(
+        out,
+        query=plan.intent,
+        engine=config.search_engine,
+        tier="deep",
+        plan_angles=angle_dicts,
+        ranked_web=ranked,
     )
-    mode = "fast" if fast else "stealth"
-    print(f"wrote {out} ({mode})")
+    print(f"wrote {out} ({len(ranked)} similar results)")
+    return 0
+def cmd_scrape(args: argparse.Namespace, config: HarnessWebConfig) -> int:
+    out = Path(args.output or _default_out("page.md"))
+    fast = config.use_fast_for_url(args.url, args.fast)
+    hl_out = args.highlights_output
+    hl_query = (args.highlight_query or "").strip()
+    if args.highlights and hl_query:
+        scrape_url_with_highlights(
+            args.url,
+            str(out),
+            hl_out or str(_default_out("highlights.json")),
+            config=config,
+            fast=fast,
+            wait_ms=args.wait_for,
+            highlight_query=hl_query,
+        )
+        print(f"wrote {out} (highlights)")
+    else:
+        scrape_url(
+            args.url,
+            str(out),
+            config=config,
+            fast=fast,
+            wait_ms=args.wait_for,
+        )
+        mode = "fast" if fast else "stealth"
+        print(f"wrote {out} ({mode})")
+    return 0
+def cmd_contents_batch(args: argparse.Namespace, config: HarnessWebConfig) -> int:
+    import json
+    out_dir = Path(args.output or _default_out("contents"))
+    out_dir.mkdir(parents=True, exist_ok=True)
+    urls: list[str] = list(args.urls or [])
+    if args.from_search:
+        data = json.loads(Path(args.from_search).read_text(encoding="utf-8"))
+        for item in data.get("data", {}).get("web", []):
+            u = (item.get("url") or "").strip()
+            if u:
+                urls.append(u)
+    if not urls:
+        print("contents-batch: no URLs", file=sys.stderr)
+        return 1
+    hl_query = (args.highlight_query or "").strip()
+    manifest: list[dict] = []
+    sleep_sec = config.rate_limit_ms / 1000.0
+    for i, url in enumerate(urls[: args.limit]):
+        if i and sleep_sec > 0:
+            time.sleep(sleep_sec)
+        safe = urlparse(url).netloc.replace(".", "_")
+        md_path = out_dir / f"{safe}.md"
+        hl_path = out_dir / f"{safe}.highlights.json" if args.highlights and hl_query else None
+        fast = config.use_fast_for_url(url, args.fast)
+        try:
+            if hl_path:
+                scrape_url_with_highlights(
+                    url,
+                    str(md_path),
+                    str(hl_path),
+                    config=config,
+                    fast=fast,
+                    wait_ms=None,
+                    highlight_query=hl_query,
+                )
+            else:
+                scrape_url(url, str(md_path), config=config, fast=fast, wait_ms=None)
+            manifest.append({"url": url, "markdown": str(md_path), "ok": True})
+        except Exception as err:  # noqa: BLE001
+            manifest.append({"url": url, "ok": False, "error": str(err)})
+    manifest_path = out_dir / "manifest.json"
+    manifest_path.write_text(json.dumps({"urls": manifest}, indent=2) + "\n", encoding="utf-8")
+    if args.evidence_bundle and args.from_search:
+        eb_path = Path(args.evidence_bundle)
+        bundle = build_evidence_bundle(Path(args.from_search), query=hl_query)
+        write_evidence_bundle(eb_path, bundle)
+        print(f"wrote {eb_path}")
+    print(f"wrote {len(manifest)} entries to {out_dir}")
     return 0
@@ -132,9 +278,41 @@ def build_parser() -> argparse.ArgumentParser:
     ps = sub.add_parser("search", help="Search via configured SERP (HARNESS_WEB_SEARCH_ENGINE)")
     ps.add_argument("query", help="Search query")
     ps.add_argument("-o", "--output", help="JSON output path (default: .web/search.json)")
-    ps.add_argument("--limit", type=int, default=5)
+    ps.add_argument("--limit", type=int, default=None)
+    ps.add_argument(
+        "--tier",
+        choices=("instant", "standard", "deep", "research"),
+        default="standard",
+        help="WRS tier (instant=5, standard=10 results)",
+    )
     ps.set_defaults(func=cmd_search)
+    pd = sub.add_parser("search-deep", help="Multi-angle SERP fusion (WRS deep)")
+    pd.add_argument("query", help="Original research intent")
+    pd.add_argument("-o", "--output", help="JSON output (default: .web/search-deep.json)")
+    pd.add_argument("--limit", type=int, default=10, help="Final fused result count")
+    pd.add_argument("--per-angle-limit", type=int, default=8, help="SERP hits per angle")
+    pd.add_argument(
+        "--angles-file",
+        metavar="YAML",
+        help="Angles from web-query-expander (.web/angles.yaml)",
+    )
+    pd.add_argument(
+        "--expand-heuristic",
+        action="store_true",
+        help="Emergency angle templates without expander subagent",
+    )
+    pd.add_argument("--category", help="Hint: code|company|people|paper|news")
+    pd.set_defaults(func=cmd_search_deep)
+    pf = sub.add_parser("find-similar", help="Pages similar to a seed URL")
+    pf.add_argument("url", help="Seed URL")
+    pf.add_argument("-o", "--output", help="JSON output (default: .web/search-deep.json)")
+    pf.add_argument("--limit", type=int, default=10)
+    pf.add_argument("--per-angle-limit", type=int, default=6)
+    pf.add_argument("--fast", action="store_true", help="Fast HTTP for seed fetch")
+    pf.set_defaults(func=cmd_find_similar)
     pc = sub.add_parser("scrape", help="Scrape a URL to markdown")
     pc.add_argument("url")
     pc.add_argument("-o", "--output", help="Markdown output (default: .web/page.md)")
@@ -150,8 +328,33 @@ def build_parser() -> argparse.ArgumentParser:
         metavar="MS",
         help="Extra wait after load (stealth mode, milliseconds)",
     )
+    pc.add_argument("--highlights", action="store_true", help="Extract query-aligned excerpts")
+    pc.add_argument("--highlight-query", help="Query for highlight scoring")
+    pc.add_argument(
+        "--highlights-output",
+        help="Highlights JSON path (default: .web/highlights.json)",
+    )
     pc.set_defaults(func=cmd_scrape)
+    pbatch = sub.add_parser("contents-batch", help="Batch scrape URLs to markdown manifest")
+    pbatch.add_argument("urls", nargs="*", help="URLs to fetch")
+    pbatch.add_argument("-o", "--output", help="Output directory (default: .web/contents)")
+    pbatch.add_argument("--limit", type=int, default=5)
+    pbatch.add_argument(
+        "--from-search",
+        metavar="JSON",
+        help="URLs from search.json or search-deep.json",
+    )
+    pbatch.add_argument("--fast", action="store_true")
+    pbatch.add_argument("--highlights", action="store_true")
+    pbatch.add_argument("--highlight-query", default="")
+    pbatch.add_argument(
+        "--evidence-bundle",
+        metavar="JSON",
+        help="Write evidence-bundle.json from --from-search",
+    )
+    pbatch.set_defaults(func=cmd_contents_batch)
     pb = sub.add_parser("bulk-scrape", help="Search then scrape multiple URLs")
     pb.add_argument("query", nargs="?", help="Search query when not using --from-search")
     pb.add_argument("-o", "--output", help="Output directory (default: .web/bulk)")

package/.pi/scripts/harness_web/deep_search.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""WRS deep search orchestration."""
+from __future__ import annotations
+import os
+from pathlib import Path
+from .config import HarnessWebConfig
+from .multi_search import multi_search
+from .query_angles import AnglesPlan, resolve_angles
+from .rank import fuse_angle_results
+def _rerank_mode() -> str:
+    mode = os.environ.get("HARNESS_WEB_RERANK", "off").strip().lower()
+    if mode in ("off", "lexical", "embed"):
+        return mode
+    return "off"
+def run_deep_search(
+    query: str,
+    *,
+    config: HarnessWebConfig,
+    angles_file: Path | None = None,
+    expand_heuristic: bool = False,
+    category: str | None = None,
+    per_angle_limit: int = 8,
+    final_limit: int = 10,
+) -> tuple[AnglesPlan, list[dict]]:
+    plan = resolve_angles(
+        query,
+        angles_file=angles_file,
+        expand_heuristic=expand_heuristic,
+        category=category,
+    )
+    per_angle = multi_search(plan, per_angle_limit=per_angle_limit, config=config)
+    # Strip internal tags before fusion
+    clean: dict[str, list[dict[str, str]]] = {}
+    for aid, rows in per_angle.items():
+        clean[aid] = [
+            {
+                "url": r.get("url", ""),
+                "title": r.get("title", ""),
+                "description": r.get("description", ""),
+            }
+            for r in rows
+        ]
+    ranked = fuse_angle_results(
+        clean,
+        final_limit=final_limit,
+        intent=plan.intent,
+        rerank_mode=_rerank_mode(),
+    )
+    return plan, [h.to_web_dict() for h in ranked]

package/.pi/scripts/harness_web/evidence_bundle.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Build evidence-bundle.json from search-deep + optional highlight fetches."""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
+def build_evidence_bundle(
+    search_deep_path: Path,
+    *,
+    highlight_files: dict[str, Path] | None = None,
+    query: str = "",
+) -> dict[str, Any]:
+    data = json.loads(search_deep_path.read_text(encoding="utf-8"))
+    intent = data.get("query") or query
+    hits = data.get("data", {}).get("web", [])
+    sources: list[dict[str, Any]] = []
+    for hit in hits:
+        url = hit.get("url", "")
+        entry: dict[str, Any] = {
+            "url": url,
+            "title": hit.get("title", ""),
+            "description": hit.get("description", ""),
+            "score": hit.get("score"),
+            "angle_ids": hit.get("angle_ids", []),
+        }
+        if highlight_files and url in highlight_files:
+            hp = highlight_files[url]
+            if hp.exists():
+                try:
+                    entry["highlights"] = json.loads(hp.read_text(encoding="utf-8"))
+                except json.JSONDecodeError:
+                    pass
+        sources.append(entry)
+    return {
+        "intent": intent,
+        "mode": data.get("mode", "deep"),
+        "engine": data.get("engine", ""),
+        "sources": sources,
+    }
+def write_evidence_bundle(path: Path, payload: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")

package/.pi/scripts/harness_web/find_similar.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""Seed-URL discovery (Exa findSimilar analog)."""
+from __future__ import annotations
+import re
+from pathlib import Path
+from .config import HarnessWebConfig
+from .deep_search import run_deep_search
+from .query_angles import AnglesPlan, SearchAngle
+from .rank import RankedHit, fuse_angle_results, normalize_url, tokenize
+from .scrape import fetch_page
+def _extract_seed_phrases(url: str, *, config: HarnessWebConfig, fast: bool) -> list[str]:
+    page = fetch_page(url, config=config, fast=fast, wait_ms=None)
+    title = ""
+    if hasattr(page, "css"):
+        for sel in ("title", "h1"):
+            nodes = page.css(sel)
+            if nodes:
+                title = (nodes[0].get_all_text(strip=True) or "").strip()
+                if title:
+                    break
+    if not title and hasattr(page, "get_all_text"):
+        title = (page.get_all_text(strip=True) or "")[:200].strip()
+    title = re.sub(r"\s+", " ", title).strip()
+    phrases: list[str] = []
+    if title:
+        phrases.append(title[:120])
+    # Key tokens from title
+    tokens = sorted(tokenize(title), key=len, reverse=True)[:6]
+    if tokens:
+        phrases.append(" ".join(tokens[:5]))
+    phrases.append(f"similar to {title[:80]}" if title else f"related pages {url}")
+    return [p for p in phrases if p.strip()][:3]
+def run_find_similar(
+    seed_url: str,
+    *,
+    config: HarnessWebConfig,
+    final_limit: int = 10,
+    per_angle_limit: int = 6,
+    fast_fetch: bool = True,
+) -> tuple[AnglesPlan, list[dict]]:
+    phrases = _extract_seed_phrases(seed_url, config=config, fast=fast_fetch)
+    angles = tuple(
+        SearchAngle(f"similar_{i + 1}", q, f"Derived from seed {seed_url}")
+        for i, q in enumerate(phrases)
+    )
+    plan = AnglesPlan(intent=f"pages similar to {seed_url}", angles=angles)
+    from .multi_search import multi_search
+    per_angle = multi_search(plan, per_angle_limit=per_angle_limit, config=config)
+    clean: dict[str, list[dict[str, str]]] = {}
+    for aid, rows in per_angle.items():
+        clean[aid] = [
+            {
+                "url": r.get("url", ""),
+                "title": r.get("title", ""),
+                "description": r.get("description", ""),
+            }
+            for r in rows
+        ]
+    ranked = fuse_angle_results(clean, final_limit=final_limit * 2, intent=plan.intent)
+    # Boost overlap with seed text
+    seed_norm = normalize_url(seed_url)
+    seed_tokens = tokenize(" ".join(phrases))
+    rescored: list[RankedHit] = []
+    for h in ranked:
+        if normalize_url(h.url) == seed_norm:
+            continue
+        blob = f"{h.title} {h.description}".lower()
+        overlap = len(seed_tokens & tokenize(blob)) / max(len(seed_tokens), 1)
+        rescored.append(
+            RankedHit(
+                url=h.url,
+                title=h.title,
+                description=h.description,
+                score=h.score + 0.2 * overlap,
+                angle_ids=h.angle_ids,
+                ranks=h.ranks,
+            )
+        )
+    rescored.sort(key=lambda x: -x.score)
+    return plan, [h.to_web_dict() for h in rescored[:final_limit]]

package/.pi/scripts/harness_web/heuristic_angles_shipped.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""Shipped WRS heuristic angles (stdlib-only). Keep in sync with web-heuristic-angles.yaml."""
+from __future__ import annotations
+from typing import Any
+# fmt: off
+SHIPPED_HEURISTIC_ANGLES: dict[str, Any] = {
+    "version": 1,
+    "max_angles": 8,
+    "base": [
+        {"id": "definitional", "query": "{query}", "rationale": "Core intent phrasing"},
+        {
+            "id": "authoritative",
+            "query": "{query} official documentation OR specification OR RFC",
+            "rationale": "Primary specs and vendor docs",
+        },
+    ],
+    "categories": {
+        "code": [
+            {"id": "github", "query": "{query} site:github.com", "rationale": "Source, issues, discussions"},
+            {"id": "stackoverflow", "query": "{query} site:stackoverflow.com", "rationale": "Debugging and API usage Q&A"},
+            {"id": "stackexchange", "query": "{query} site:stackexchange.com", "rationale": "Broader SE network (Super User, Server Fault, etc.)"},
+            {"id": "readthedocs", "query": "{query} site:readthedocs.io", "rationale": "OSS library documentation"},
+            {"id": "mdn", "query": "{query} site:developer.mozilla.org", "rationale": "Web platform and browser APIs"},
+            {"id": "package_registries", "query": "{query} site:npmjs.com OR site:pypi.org OR site:pkg.go.dev OR site:crates.io", "rationale": "Package metadata across major ecosystems"},
+            {"id": "microsoft_learn", "query": "{query} site:learn.microsoft.com", "rationale": ".NET, Azure, Windows, and enterprise stacks"},
+            {"id": "hacker_news", "query": "{query} site:news.ycombinator.com", "rationale": "High-signal practitioner discussion"},
+            {"id": "gitlab", "query": "{query} site:gitlab.com", "rationale": "Alternate host and CI-visible code"},
+            {"id": "devto", "query": "{query} site:dev.to OR site:medium.com", "rationale": "Tutorials and implementation writeups"},
+        ],
+        "paper": [
+            {"id": "arxiv", "query": "{query} site:arxiv.org", "rationale": "Preprints and latest ML/CS uploads"},
+            {"id": "semantic_scholar", "query": "{query} site:semanticscholar.org", "rationale": "Citations, influences, and PDF links"},
+            {"id": "google_scholar", "query": "{query} site:scholar.google.com", "rationale": "Broad academic discovery"},
+            {"id": "papers_with_code", "query": "{query} site:paperswithcode.com", "rationale": "Benchmarks tied to implementations"},
+            {"id": "openreview", "query": "{query} site:openreview.net", "rationale": "Peer reviews and ML conference submissions"},
+            {"id": "acl_anthology", "query": "{query} site:aclanthology.org", "rationale": "NLP and computational linguistics"},
+            {"id": "acm_dl", "query": "{query} site:dl.acm.org", "rationale": "ACM proceedings and journals"},
+            {"id": "pubmed", "query": "{query} site:pubmed.ncbi.nlm.nih.gov", "rationale": "Biomedical and life-sciences literature"},
+        ],
+        "news": [
+            {"id": "recent", "query": "{query} news 2025 2026", "rationale": "Recency-biased open web"},
+            {"id": "wire_reuters", "query": "{query} site:reuters.com", "rationale": "Wire-service reporting"},
+            {"id": "wire_ap", "query": "{query} site:apnews.com", "rationale": "Associated Press coverage"},
+            {"id": "tech_press", "query": "{query} site:techcrunch.com OR site:theverge.com OR site:arstechnica.com", "rationale": "Technology industry news"},
+            {"id": "business_press", "query": "{query} site:bloomberg.com OR site:ft.com OR site:wsj.com", "rationale": "Markets and business context"},
+            {"id": "analysis", "query": "{query} in-depth analysis explainer", "rationale": "Long-form journalism and explainers"},
+            {"id": "bbc", "query": "{query} site:bbc.com/news", "rationale": "International general news desk"},
+        ],
+        "company": [
+            {"id": "official_site", "query": "{query} official website", "rationale": "Company-controlled messaging"},
+            {"id": "crunchbase", "query": "{query} site:crunchbase.com", "rationale": "Funding, investors, and competitors"},
+            {"id": "linkedin_company", "query": "{query} site:linkedin.com/company", "rationale": "Headcount, hiring, and positioning"},
+            {"id": "sec_filings", "query": "{query} site:sec.gov 10-K OR 10-Q OR S-1", "rationale": "US public-company disclosures"},
+            {"id": "g2_reviews", "query": "{query} site:g2.com OR site:capterra.com", "rationale": "B2B software reviews and comparisons"},
+            {"id": "company_news", "query": "{query} company announcement press release", "rationale": "Launches, partnerships, and earnings"},
+            {"id": "glassdoor", "query": "{query} site:glassdoor.com", "rationale": "Employee sentiment and culture signals"},
+        ],
+        "people": [
+            {"id": "linkedin", "query": "{query} site:linkedin.com/in", "rationale": "Professional profiles"},
+            {"id": "github_person", "query": "{query} site:github.com", "rationale": "Open-source footprint for builders"},
+            {"id": "wikipedia", "query": "{query} site:en.wikipedia.org", "rationale": "Neutral biographical baseline"},
+            {"id": "scholar_person", "query": "{query} site:scholar.google.com", "rationale": "Publication record for researchers"},
+            {"id": "interviews", "query": "{query} interview podcast keynote", "rationale": "First-person statements and talks"},
+            {"id": "twitter_x", "query": "{query} site:x.com OR site:twitter.com", "rationale": "Public statements and discourse"},
+        ],
+        "security": [
+            {"id": "cve_nvd", "query": "{query} CVE site:nvd.nist.gov", "rationale": "National Vulnerability Database"},
+            {"id": "owasp", "query": "{query} site:owasp.org", "rationale": "AppSec standards and cheat sheets"},
+            {"id": "cwe", "query": "{query} site:cwe.mitre.org", "rationale": "Weakness taxonomy"},
+            {"id": "github_advisories", "query": "{query} site:github.com/advisories OR dependabot", "rationale": "Ecosystem security advisories"},
+            {"id": "snyk_blog", "query": "{query} site:snyk.io/blog OR vulnerability", "rationale": "Practitioner security writeups"},
+        ],
+        "default": [
+            {"id": "technical", "query": "{query} how it works architecture internals", "rationale": "Mechanism and design"},
+            {"id": "criticism", "query": "{query} limitations criticism drawbacks", "rationale": "Counterpoints and failure modes"},
+            {"id": "wikipedia", "query": "{query} site:en.wikipedia.org", "rationale": "Structured overview"},
+            {"id": "comparison", "query": "{query} vs alternatives comparison benchmark", "rationale": "Competitive landscape"},
+            {"id": "reddit", "query": "{query} site:reddit.com", "rationale": "Community experience reports"},
+            {"id": "hn_default", "query": "{query} site:news.ycombinator.com", "rationale": "Practitioner threads when category unknown"},
+        ],
+    },
+}
+# fmt: on