PyPI - eightstatecli - Versions diffs - 0.4.0__py3-none-any.whl - Mend

eightstatecli 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

eightstatecli-0.4.0.dist-info/METADATA +177 -0
eightstatecli-0.4.0.dist-info/RECORD +18 -0
eightstatecli-0.4.0.dist-info/WHEEL +4 -0
eightstatecli-0.4.0.dist-info/entry_points.txt +2 -0
eightstatecli-0.4.0.dist-info/licenses/LICENSE +21 -0
escli/__init__.py +837 -0
escli/__main__.py +5 -0
escli/commands/__init__.py +0 -0
escli/commands/audio.py +438 -0
escli/commands/docs.py +354 -0
escli/commands/research.py +597 -0
escli/commands/search.py +286 -0
escli/commands/social.py +243 -0
escli/commands/usage.py +428 -0
escli/services/__init__.py +0 -0
escli/services/credentials.py +117 -0
escli/services/describe.py +186 -0
escli/services/output.py +168 -0

escli/commands/research.py ADDED Viewed

@@ -0,0 +1,597 @@
+"""
+escli research — web research tasks via Parallel Task API.
+Supports the full Task API surface: deep research, data enrichment,
+structured output, source policies, interactions, and all processors.
+Usage:
+  escli research "query" -o report.md                     Deep research → markdown
+  escli research "query" -o report.md -p ultra            Higher quality processor
+  escli research "query" --schema schema.json -o out.json Structured JSON output
+  escli research "query" --include-domains sec.gov,wsj.com Only use specific sources
+  escli research status <run-id>                          Check task status
+  escli research result <run-id> -o output.md             Fetch completed result
+Processors (ascending quality/cost/time):
+  lite, base, core, core2x, pro, ultra, ultra2x, ultra4x, ultra8x
+  Add -fast suffix for speed-optimized variants
+"""
+import argparse
+import json
+import os
+import pathlib
+import sys
+import time
+import urllib.request
+import urllib.error
+from datetime import datetime, timezone
+from ..services.credentials import get_key_for_service
+API_BASE = "https://api.parallel.ai"
+SSE_BETA = "events-sse-2025-07-24"
+MAX_POLL_WAIT = 7200  # 2 hours (ultra8x can take up to 2h)
+POLL_INTERVAL = 10
+PROCESSORS = [
+    "lite", "base", "core", "core2x", "pro", "ultra",
+    "ultra2x", "ultra4x", "ultra8x",
+    "lite-fast", "base-fast", "core-fast", "core2x-fast",
+    "pro-fast", "ultra-fast", "ultra2x-fast", "ultra4x-fast", "ultra8x-fast",
+]
+def _get_api_key() -> str:
+    key = get_key_for_service("parallel", "PARALLEL_API_KEY")
+    if not key:
+        print("  ✗ no Parallel API key. Set PARALLEL_API_KEY or add one via the dashboard.", file=sys.stderr)
+        sys.exit(1)
+    return key
+def _api_request(method: str, path: str, api_key: str,
+                 body: dict | None = None, extra_headers: dict | None = None) -> dict:
+    url = f"{API_BASE}{path}"
+    hdrs = {"x-api-key": api_key, "Content-Type": "application/json"}
+    if extra_headers:
+        hdrs.update(extra_headers)
+    data = json.dumps(body).encode() if body else None
+    req = urllib.request.Request(url, data=data, headers=hdrs, method=method)
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            return json.loads(resp.read().decode())
+    except urllib.error.HTTPError as e:
+        try:
+            err_body = json.loads(e.read().decode())
+        except Exception:
+            err_body = {"error": {"message": str(e)}}
+        msg = err_body.get("error", {}).get("message", str(e))
+        if e.code == 401:
+            print(f"  ✗ auth failed (401): {msg}", file=sys.stderr); sys.exit(1)
+        elif e.code == 429:
+            print(f"  ✗ rate limited (429): {msg}", file=sys.stderr); sys.exit(2)
+        else:
+            print(f"  ✗ API error ({e.code}): {msg}", file=sys.stderr); sys.exit(2)
+    except urllib.error.URLError as e:
+        print(f"  ✗ network error: {e.reason}", file=sys.stderr); sys.exit(2)
+# ── SSE streaming ────────────────────────────────────────────────
+def _stream_sse(api_key: str, run_id: str, quiet: bool = False) -> dict | None:
+    url = f"{API_BASE}/v1beta/tasks/runs/{run_id}/events"
+    seen = set()
+    for attempt in range(25):
+        if attempt > 0 and not quiet:
+            print(f"  [sse] reconnecting ({attempt + 1})...", file=sys.stderr)
+        req = urllib.request.Request(url, headers={
+            "x-api-key": api_key,
+            "Accept": "text/event-stream",
+            "parallel-beta": SSE_BETA,
+        })
+        try:
+            resp = urllib.request.urlopen(req, timeout=600)
+        except Exception as e:
+            if not quiet:
+                print(f"  [sse] failed: {e}", file=sys.stderr)
+            return None
+        try:
+            for raw_line in resp:
+                line = raw_line.decode("utf-8", errors="replace").rstrip("\n\r")
+                if not line.startswith("data: "):
+                    continue
+                try:
+                    event = json.loads(line[6:])
+                except json.JSONDecodeError:
+                    continue
+                etype = event.get("type", "")
+                if etype == "task_run.progress_msg.exec_status" and not quiet:
+                    msg = event.get("message", "")
+                    if ("progress", msg) not in seen:
+                        seen.add(("progress", msg))
+                        print(f"  [progress] {msg}", file=sys.stderr)
+                elif etype == "task_run.progress_stats" and not quiet:
+                    meter = event.get("progress_meter", "")
+                    if ("stats", str(meter)) not in seen:
+                        seen.add(("stats", str(meter)))
+                        stats = event.get("source_stats", {})
+                        read = stats.get("num_sources_read", 0)
+                        print(f"  [progress] {meter}% — {read} sources read", file=sys.stderr)
+                elif etype == "task_run.state":
+                    run = event.get("run", {})
+                    status = run.get("status", "")
+                    if status == "completed":
+                        return event.get("output")
+                    elif status == "failed":
+                        print(f"  ✗ task failed: {run.get('error', '')}", file=sys.stderr)
+                        sys.exit(2)
+                elif etype == "error":
+                    return None
+        except Exception:
+            pass
+        finally:
+            resp.close()
+    return None
+def _poll_until_complete(api_key: str, run_id: str, quiet: bool = False) -> dict:
+    start = time.time()
+    while time.time() - start < MAX_POLL_WAIT:
+        result = _api_request("GET", f"/v1/tasks/runs/{run_id}", api_key)
+        status = result.get("status", "")
+        if not quiet:
+            print(f"  [poll] status={status}", file=sys.stderr)
+        if status == "completed":
+            result = _api_request("GET", f"/v1/tasks/runs/{run_id}/result", api_key)
+            return result.get("output")
+        elif status in ("failed", "cancelled"):
+            print(f"  ✗ task {status}: {result.get('error', '')}", file=sys.stderr)
+            sys.exit(2)
+        time.sleep(POLL_INTERVAL)
+    print("  ✗ timed out.", file=sys.stderr); sys.exit(3)
+# ── Markdown formatting ──────────────────────────────────────────
+def _format_markdown(query: str, processor: str, run_id: str, output: dict | None,
+                     created_at: str, include_basis: bool = True) -> str:
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    lines = [
+        "---", f'query: "{query}"', f"processor: {processor}",
+        f"run_id: {run_id}", f"created_at: {created_at}", f"retrieved_at: {now}",
+        "---", "", f"# Research: {query}", "",
+    ]
+    if output is None:
+        lines.append("*No output returned.*")
+        return "\n".join(lines)
+    content = output.get("content")
+    basis = output.get("basis", [])
+    if isinstance(content, str):
+        # Could be JSON string or plain text
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict):
+                _render_dict(lines, parsed)
+            else:
+                lines.append(content)
+        except (json.JSONDecodeError, TypeError):
+            lines.append(content)
+    elif isinstance(content, dict):
+        _render_dict(lines, content)
+    elif content is not None:
+        lines.append(str(content))
+    if include_basis and basis:
+        lines.extend(["", "## Research Basis", ""])
+        for entry in basis:
+            field = entry.get("field", "unknown")
+            reasoning = entry.get("reasoning", "")
+            confidence = entry.get("confidence", "")
+            citations = entry.get("citations", [])
+            lines.append(f"### {field}")
+            if confidence:
+                lines.append(f"**Confidence:** {confidence}")
+            if reasoning:
+                lines.append(f"\n{reasoning}")
+            if citations:
+                lines.append("")
+                for cite in citations:
+                    url = cite.get("url", "")
+                    title = cite.get("title", url)
+                    lines.append(f"- [{title}]({url})")
+                    for exc in cite.get("excerpts", []):
+                        lines.append(f"  > {exc}")
+            lines.append("")
+    return "\n".join(lines)
+def _render_dict(lines: list[str], d: dict):
+    for key, value in d.items():
+        lines.append(f"## {key.replace('_', ' ').title()}")
+        lines.append("")
+        if isinstance(value, str):
+            lines.append(value)
+        elif isinstance(value, list):
+            if value and all(isinstance(i, dict) for i in value):
+                _render_table(lines, value)
+            else:
+                for item in value:
+                    if isinstance(item, dict):
+                        for k, v in item.items():
+                            lines.append(f"**{k.replace('_', ' ').title()}:** {v}")
+                        lines.append("")
+                    else:
+                        lines.append(f"- {item}")
+        elif isinstance(value, dict):
+            for k, v in value.items():
+                lines.append(f"**{k.replace('_', ' ').title()}:** {v}")
+        else:
+            lines.append(str(value))
+        lines.append("")
+def _render_table(lines: list[str], items: list[dict]):
+    keys = []
+    for item in items:
+        for k in item:
+            if k not in keys:
+                keys.append(k)
+    headers = [k.replace("_", " ").title() for k in keys]
+    lines.append("| " + " | ".join(headers) + " |")
+    lines.append("| " + " | ".join("---" for _ in keys) + " |")
+    for item in items:
+        row = [str(item.get(k, "")).replace("\n", " ").replace("|", "\\|") for k in keys]
+        lines.append("| " + " | ".join(row) + " |")
+# ── Commands ─────────────────────────────────────────────────────
+def cmd_run(args):
+    """Create and execute a research task."""
+    api_key = _get_api_key()
+    query = " ".join(args.query)
+    processor = args.processor
+    if not args.quiet:
+        print(f"  ▸ submitting task (processor={processor})...", file=sys.stderr)
+    # Build request body
+    body: dict = {
+        "processor": processor,
+        "input": query,
+        "enable_events": True,
+    }
+    # Input: JSON object from file or --input-json
+    if getattr(args, "input_json", None):
+        try:
+            body["input"] = json.loads(args.input_json)
+        except json.JSONDecodeError:
+            print("  ✗ --input-json must be valid JSON", file=sys.stderr); return 1
+    if getattr(args, "input_file", None):
+        path = pathlib.Path(args.input_file)
+        if not path.exists():
+            print(f"  ✗ input file not found: {path}", file=sys.stderr); return 1
+        body["input"] = json.loads(path.read_text())
+    # Output schema
+    if getattr(args, "schema", None):
+        schema_path = pathlib.Path(args.schema)
+        if not schema_path.exists():
+            print(f"  ✗ schema file not found: {schema_path}", file=sys.stderr); return 1
+        schema_data = json.loads(schema_path.read_text())
+        body["task_spec"] = {"output_schema": {"type": "json", "json_schema": schema_data}}
+    elif getattr(args, "output_schema", None):
+        body["task_spec"] = {"output_schema": args.output_schema}
+    elif getattr(args, "text", False):
+        body["task_spec"] = {"output_schema": {"type": "text"}}
+    # else: auto (default for pro+ processors)
+    # Source policy
+    source_policy = {}
+    if getattr(args, "include_domains", None):
+        source_policy["include_domains"] = [d.strip() for d in args.include_domains.split(",")]
+    if getattr(args, "exclude_domains", None):
+        source_policy["exclude_domains"] = [d.strip() for d in args.exclude_domains.split(",")]
+    if getattr(args, "after_date", None):
+        source_policy["after_date"] = args.after_date
+    if source_policy:
+        body["source_policy"] = source_policy
+    # Advanced settings
+    if getattr(args, "location", None):
+        body["advanced_settings"] = {"location": args.location}
+    # Metadata
+    if getattr(args, "metadata", None):
+        try:
+            body["metadata"] = json.loads(args.metadata)
+        except json.JSONDecodeError:
+            # Parse key=value pairs
+            meta = {}
+            for pair in args.metadata.split(","):
+                if "=" in pair:
+                    k, v = pair.split("=", 1)
+                    meta[k.strip()] = v.strip()
+            body["metadata"] = meta
+    # Follow-up on previous interaction
+    if getattr(args, "follow_up", None):
+        body["previous_interaction_id"] = args.follow_up
+    # Submit
+    headers = {"parallel-beta": SSE_BETA}
+    task = _api_request("POST", "/v1/tasks/runs", api_key, body=body, extra_headers=headers)
+    run_id = task["run_id"]
+    created_at = task.get("created_at", "")
+    if not args.quiet:
+        print(f"  · run_id: {run_id}", file=sys.stderr)
+    # Stream progress + wait for result
+    if not args.quiet:
+        print("  ░░░░░░░░░░░░░░░░░░░░  researching...", file=sys.stderr)
+    output = _stream_sse(api_key, run_id, args.quiet)
+    if output is None:
+        if not args.quiet:
+            print("  [fallback] polling...", file=sys.stderr)
+        output = _poll_until_complete(api_key, run_id, args.quiet)
+    # Output
+    out_path = pathlib.Path(args.output) if args.output else None
+    if args.json:
+        result = {
+            "success": True, "run_id": run_id, "processor": processor,
+            "output": output,
+        }
+        if out_path:
+            result["path"] = str(out_path.resolve())
+        text = json.dumps(result, indent=2)
+        if out_path:
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text(text, encoding="utf-8")
+        print(text)
+    else:
+        md = _format_markdown(query, processor, run_id, output, created_at,
+                              include_basis=not getattr(args, "no_basis", False))
+        if out_path:
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text(md, encoding="utf-8")
+            if not args.quiet:
+                print(f"\n  ✓ {out_path}", file=sys.stderr)
+            if args.quiet:
+                print(str(out_path.resolve()))
+        else:
+            print(md)
+    return 0
+def cmd_status(args):
+    """Check task run status."""
+    api_key = _get_api_key()
+    result = _api_request("GET", f"/v1/tasks/runs/{args.run_id}", api_key)
+    if args.json:
+        print(json.dumps({"success": True, **result}))
+    else:
+        status = result.get("status", "unknown")
+        print(f"  {args.run_id}: {status}")
+        if result.get("error"):
+            print(f"  error: {result['error']}")
+        if result.get("warnings"):
+            for w in result["warnings"]:
+                print(f"  warning: {w.get('message', '')}")
+    return 0
+def cmd_processors(args):
+    """List available processor tiers."""
+    STANDARD = [
+        ("lite",    "10s – 60s",    "Basic metadata, low latency",              "~2"),
+        ("base",    "15s – 100s",   "Reliable standard enrichments",            "~5"),
+        ("core",    "60s – 5min",   "Cross-referenced, moderate complexity",    "~10"),
+        ("core2x",  "60s – 10min",  "High complexity cross-referenced",         "~10"),
+        ("pro",     "2min – 10min", "Exploratory web research",                 "~20"),
+        ("ultra",   "5min – 25min", "Advanced multi-source deep research",      "~20"),
+        ("ultra2x", "5min – 50min", "Difficult deep research",                  "~25"),
+        ("ultra4x", "5min – 90min", "Very difficult deep research",             "~25"),
+        ("ultra8x", "5min – 2hr",   "Most difficult deep research",             "~25"),
+    ]
+    FAST = [
+        ("lite-fast",    "10s – 20s",    "Lowest latency",                      "~2"),
+        ("base-fast",    "15s – 50s",    "Fast standard enrichments",           "~5"),
+        ("core-fast",    "15s – 100s",   "Fast cross-referenced",              "~10"),
+        ("core2x-fast",  "15s – 3min",   "Fast high complexity",                "~10"),
+        ("pro-fast",     "30s – 5min",   "Fast exploratory research",           "~20"),
+        ("ultra-fast",   "1min – 10min", "Fast deep research",                  "~20"),
+        ("ultra2x-fast", "1min – 20min", "Fast difficult research",             "~25"),
+        ("ultra4x-fast", "1min – 40min", "Fast very difficult research",        "~25"),
+        ("ultra8x-fast", "1min – 1hr",   "Fast most difficult research",        "~25"),
+    ]
+    if args.json:
+        all_procs = []
+        for name, latency, desc, fields in STANDARD:
+            all_procs.append({"name": name, "variant": "standard", "latency": latency, "description": desc, "max_fields": fields})
+        for name, latency, desc, fields in FAST:
+            all_procs.append({"name": name, "variant": "fast", "latency": latency, "description": desc, "max_fields": fields})
+        print(json.dumps({"success": True, "processors": all_procs}))
+        return 0
+    print("\n  Standard processors (prioritize data freshness):\n")
+    print(f"  {'PROCESSOR':<12} {'LATENCY':<16} {'MAX FIELDS':<12} {'STRENGTHS'}")
+    print(f"  {'─' * 75}")
+    for name, latency, desc, fields in STANDARD:
+        print(f"  {name:<12} {latency:<16} {fields:<12} {desc}")
+    print("\n  Fast processors (prioritize speed, 2-5x faster):\n")
+    print(f"  {'PROCESSOR':<16} {'LATENCY':<16} {'MAX FIELDS':<12} {'STRENGTHS'}")
+    print(f"  {'─' * 75}")
+    for name, latency, desc, fields in FAST:
+        print(f"  {name:<16} {latency:<16} {fields:<12} {desc}")
+    print(f"""
+  Notes:
+    · Standard processors prioritize freshness — best for accuracy-critical tasks
+    · Fast processors are 2-5x faster — best for interactive/agent workflows
+    · 'auto' output schema enables Deep Research for pro and above
+    · Max fields are approximate — complex fields use more capacity
+    · Pricing: docs.parallel.ai/getting-started/pricing
+""")
+    return 0
+def cmd_result(args):
+    """Fetch completed task result."""
+    api_key = _get_api_key()
+    # Check status first
+    run = _api_request("GET", f"/v1/tasks/runs/{args.run_id}", api_key)
+    if run.get("status") != "completed":
+        if args.json:
+            print(json.dumps({"success": False, "status": run.get("status"), "error": run.get("error")}))
+        else:
+            print(f"  ✗ task not complete: {run.get('status')}", file=sys.stderr)
+        return 1
+    result = _api_request("GET", f"/v1/tasks/runs/{args.run_id}/result", api_key)
+    output = result.get("output")
+    out_path = pathlib.Path(args.output) if args.output else None
+    if args.json:
+        text = json.dumps({"success": True, "run_id": args.run_id, "output": output}, indent=2)
+        if out_path:
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text(text, encoding="utf-8")
+        print(text)
+    else:
+        md = _format_markdown("(retrieved)", run.get("processor", ""), args.run_id,
+                              output, run.get("created_at", ""),
+                              include_basis=not getattr(args, "no_basis", False))
+        if out_path:
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text(md, encoding="utf-8")
+            if not args.quiet:
+                print(f"  ✓ {out_path}", file=sys.stderr)
+        else:
+            print(md)
+    return 0
+# ── Parser ───────────────────────────────────────────────────────
+def register(subparsers):
+    """Register the research subcommand."""
+    F = argparse.RawDescriptionHelpFormatter
+    p = subparsers.add_parser(
+        "research", aliases=["r"], help="Web research tasks (Parallel Task API)",
+        formatter_class=F,
+        epilog="""modes:
+  escli research "query" -o report.md          Run a research task (default)
+  escli research --processors                  List available processor tiers
+  escli research --status <run-id>             Check task status
+  escli research --result <run-id> -o out.md   Fetch completed result
+examples:
+  escli research "HVAC industry market report" -o hvac.md
+  escli research "Stripe" --output-schema "founding year and total funding" -p base
+  escli research "AI startups 2026" -o ai.md -p ultra --after-date 2026-01-01
+  escli research "competitive analysis of CRM" -o crm.md --text --include-domains g2.com,gartner.com
+  escli research "Stripe" --schema enrichment.json -o stripe.json -p core
+  escli research "follow-up on API?" -o followup.md --follow-up trun_xxx
+processors (ascending quality):
+  lite → base → core → core2x → pro → ultra → ultra2x → ultra4x → ultra8x
+  Append -fast for speed (e.g. pro-fast, ultra-fast)
+output modes:
+  (default)         Auto — processor determines structure (deep research for pro+)
+  --text            Markdown report with inline citations
+  --schema FILE     Structured JSON output per your JSON Schema file
+  --output-schema S Inline schema description string
+""")
+    # Query (positional, optional — not needed for --status/--result/--processors)
+    p.add_argument("query", nargs="*", help="Research question or topic")
+    # Mode flags (mutually exclusive with running a query)
+    mode_g = p.add_argument_group("modes")
+    mode_g.add_argument("--processors", action="store_true", help="List available processor tiers")
+    mode_g.add_argument("--status", default=None, metavar="RUN_ID", help="Check task run status")
+    mode_g.add_argument("--result", default=None, metavar="RUN_ID", help="Fetch completed task result")
+    # Run options
+    p.add_argument("-o", "--output", default=None, help="Output file path (markdown or JSON)")
+    p.add_argument("-p", "--processor", default="pro", choices=PROCESSORS, help="Processor tier (default: pro)")
+    # Output schema
+    schema_g = p.add_argument_group("output schema")
+    schema_g.add_argument("--text", action="store_true", help="Markdown report format")
+    schema_g.add_argument("--schema", default=None, metavar="FILE", help="JSON Schema file for structured output")
+    schema_g.add_argument("--output-schema", default=None, metavar="STR", help="Inline output schema description")
+    # Input
+    input_g = p.add_argument_group("input")
+    input_g.add_argument("--input-json", default=None, metavar="JSON", help="JSON object as input (instead of text)")
+    input_g.add_argument("--input-file", default=None, metavar="FILE", help="JSON file as input")
+    # Source policy
+    source_g = p.add_argument_group("source policy")
+    source_g.add_argument("--include-domains", default=None, metavar="D1,D2", help="Only use these domains")
+    source_g.add_argument("--exclude-domains", default=None, metavar="D1,D2", help="Exclude these domains")
+    source_g.add_argument("--after-date", default=None, metavar="YYYY-MM-DD", help="Only content after this date")
+    # Advanced
+    adv_g = p.add_argument_group("advanced")
+    adv_g.add_argument("--location", default=None, metavar="CC", help="ISO country code for geo-targeted results")
+    adv_g.add_argument("--metadata", default=None, help="Metadata as JSON or key=val,key=val")
+    adv_g.add_argument("--follow-up", default=None, metavar="RUN_ID", help="Follow-up on a previous task run")
+    adv_g.add_argument("--no-basis", action="store_true", help="Exclude citations and reasoning")
+    p.set_defaults(func=_dispatch)
+    return p
+def _dispatch(args):
+    """Route to the right handler based on flags."""
+    if getattr(args, "processors", False):
+        return cmd_processors(args)
+    if getattr(args, "status", None):
+        args.run_id = args.status
+        return cmd_status(args)
+    if getattr(args, "result", None):
+        args.run_id = args.result
+        return cmd_result(args)
+    if not args.query:
+        print("  usage: escli research \"query\" -o output.md", file=sys.stderr)
+        print("  run 'escli research --processors' to see available tiers", file=sys.stderr)
+        return 2
+    return cmd_run(args)