PyPI - eightstatecli - Versions diffs - 0.4.0__py3-none-any.whl - Mend

eightstatecli 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

eightstatecli-0.4.0.dist-info/METADATA +177 -0
eightstatecli-0.4.0.dist-info/RECORD +18 -0
eightstatecli-0.4.0.dist-info/WHEEL +4 -0
eightstatecli-0.4.0.dist-info/entry_points.txt +2 -0
eightstatecli-0.4.0.dist-info/licenses/LICENSE +21 -0
escli/__init__.py +837 -0
escli/__main__.py +5 -0
escli/commands/__init__.py +0 -0
escli/commands/audio.py +438 -0
escli/commands/docs.py +354 -0
escli/commands/research.py +597 -0
escli/commands/search.py +286 -0
escli/commands/social.py +243 -0
escli/commands/usage.py +428 -0
escli/services/__init__.py +0 -0
escli/services/credentials.py +117 -0
escli/services/describe.py +186 -0
escli/services/output.py +168 -0

escli/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from escli import main
+import sys
+if __name__ == "__main__":
+    sys.exit(main() or 0)

escli/commands/__init__.py ADDED Viewed

File without changes

escli/commands/audio.py ADDED Viewed

@@ -0,0 +1,438 @@
+"""
+escli audio — transcription via AssemblyAI.
+Usage:
+  escli audio transcribe <file-or-url>     Upload + transcribe + poll + return
+  escli audio status <id>                  Check transcript status
+  escli audio get <id>                     Fetch completed transcript
+  escli audio list                         List recent transcripts
+Speaker diarization:
+  --speakers                Enable speaker labels
+  --speakers-expected N     Hint: expected number of speakers (1-20)
+  --speaker-names A,B,C     Identify speakers by name
+Audio intelligence:
+  --sentiment               Enable sentiment analysis
+  --chapters                Enable auto chapters
+  --entities                Enable entity detection
+  --summarize               Enable summarization
+  --highlights              Enable auto highlights
+  --topics                  Enable topic detection (IAB)
+  --content-safety          Enable content safety detection
+Transcription options:
+  --language CODE           Language code (default: auto-detect)
+  --dual-channel            Enable dual channel transcription
+  --multichannel            Enable multichannel transcription
+  --word-boost W1,W2        Boost accuracy for specific words
+  --disfluencies            Include filler words (umm, uh)
+  --filter-profanity        Filter profanity
+  --redact-pii              Redact personally identifiable info
+Output:
+  --format text|json|srt|vtt   Output format (default: text)
+  -o, --output FILE            Write output to file
+"""
+import argparse
+import json
+import os
+import pathlib
+import sys
+import time
+from ..services.credentials import get_key_for_service, report_key
+AAI_BASE = "https://api.assemblyai.com"
+POLL_INTERVAL = 3.0
+def _get_api_key() -> str:
+    key = get_key_for_service("assemblyai", "ASSEMBLYAI_API_KEY")
+    if not key:
+        print("  ✗ no AssemblyAI API key. Set ASSEMBLYAI_API_KEY or add one via the dashboard.", file=sys.stderr)
+        sys.exit(1)
+    return key
+def _headers(api_key: str) -> dict:
+    return {"Authorization": api_key, "Content-Type": "application/json"}
+def _request(method: str, path: str, api_key: str, **kwargs):
+    import httpx
+    url = f"{AAI_BASE}{path}"
+    headers = {"Authorization": api_key}
+    if "json_body" in kwargs:
+        headers["Content-Type"] = "application/json"
+    resp = httpx.request(
+        method, url, headers=headers,
+        json=kwargs.get("json_body"),
+        content=kwargs.get("content"),
+        timeout=kwargs.get("timeout", 30),
+    )
+    resp.raise_for_status()
+    return resp
+def _upload_file(filepath: str, api_key: str, quiet: bool = False) -> str:
+    """Upload a local file to AssemblyAI, return the upload_url."""
+    path = pathlib.Path(filepath)
+    if not path.exists():
+        print(f"  ✗ file not found: {filepath}", file=sys.stderr)
+        sys.exit(1)
+    size_mb = path.stat().st_size / (1024 * 1024)
+    if not quiet:
+        print(f"  ▸ uploading {path.name} ({size_mb:.1f} MB)...", file=sys.stderr)
+    with open(path, "rb") as f:
+        resp = _request("POST", "/v2/upload", api_key, content=f, timeout=300)
+    return resp.json()["upload_url"]
+def _build_transcript_body(args, audio_url: str) -> dict:
+    """Build the transcript request body from CLI args."""
+    body: dict = {
+        "audio_url": audio_url,
+        "speech_models": ["universal-3-pro", "universal-2"],
+    }
+    # Language
+    lang = getattr(args, "language", None)
+    if lang:
+        body["language_code"] = lang
+    else:
+        body["language_detection"] = True
+    # Speaker diarization
+    if getattr(args, "speakers", False):
+        body["speaker_labels"] = True
+    if getattr(args, "speakers_expected", None):
+        body["speaker_labels"] = True
+        body["speakers_expected"] = args.speakers_expected
+    if getattr(args, "speaker_names", None):
+        body["speaker_labels"] = True
+        names = [n.strip() for n in args.speaker_names.split(",")]
+        body["speech_understanding"] = {
+            "request": {
+                "speaker_identification": {
+                    "speaker_type": "name",
+                    "known_values": names,
+                }
+            }
+        }
+    # Audio intelligence
+    if getattr(args, "sentiment", False):
+        body["sentiment_analysis"] = True
+    if getattr(args, "chapters", False):
+        body["auto_chapters"] = True
+    if getattr(args, "entities", False):
+        body["entity_detection"] = True
+    if getattr(args, "summarize", False):
+        body["summarization"] = True
+        body["summary_model"] = "informative"
+        body["summary_type"] = "bullets"
+    if getattr(args, "highlights", False):
+        body["auto_highlights"] = True
+    if getattr(args, "topics", False):
+        body["iab_categories"] = True
+    if getattr(args, "content_safety", False):
+        body["content_safety"] = True
+    # Transcription options
+    if getattr(args, "dual_channel", False):
+        body["dual_channel"] = True
+    if getattr(args, "multichannel", False):
+        body["multichannel"] = True
+    if getattr(args, "word_boost", None):
+        body["word_boost"] = [w.strip() for w in args.word_boost.split(",")]
+        body["boost_param"] = "high"
+    if getattr(args, "disfluencies", False):
+        body["disfluencies"] = True
+    if getattr(args, "filter_profanity", False):
+        body["filter_profanity"] = True
+    if getattr(args, "redact_pii", False):
+        body["redact_pii"] = True
+        body["redact_pii_policies"] = [
+            "email_address", "phone_number", "person_name",
+            "location", "date_of_birth", "credit_card_number",
+        ]
+    return body
+def _poll(transcript_id: str, api_key: str, quiet: bool = False) -> dict:
+    """Poll until transcript is completed or errored."""
+    if not quiet:
+        print(f"  ░░░░░░░░░░░░░░░░░░░░  transcribing...", file=sys.stderr, end="", flush=True)
+    while True:
+        resp = _request("GET", f"/v2/transcript/{transcript_id}", api_key)
+        data = resp.json()
+        status = data.get("status")
+        if status == "completed":
+            if not quiet:
+                print(f"\r  ████████████████████  done              ", file=sys.stderr)
+            return data
+        elif status == "error":
+            if not quiet:
+                print(f"\r  ✗ transcription failed: {data.get('error', 'unknown')}", file=sys.stderr)
+            return data
+        time.sleep(POLL_INTERVAL)
+def _format_output(data: dict, fmt: str, args) -> str:
+    """Format transcript output."""
+    if fmt == "json":
+        return json.dumps(data, indent=2)
+    if fmt == "srt":
+        resp = _request("GET", f"/v2/transcript/{data['id']}/srt", _get_api_key())
+        return resp.text
+    if fmt == "vtt":
+        resp = _request("GET", f"/v2/transcript/{data['id']}/vtt", _get_api_key())
+        return resp.text
+    # text format
+    lines = []
+    # Speaker diarization output
+    if data.get("utterances"):
+        for u in data["utterances"]:
+            speaker = u.get("speaker", "?")
+            text = u.get("text", "")
+            lines.append(f"Speaker {speaker}: {text}")
+    elif data.get("text"):
+        lines.append(data["text"])
+    # Chapters
+    if data.get("chapters"):
+        lines.append("\n--- Chapters ---")
+        for ch in data["chapters"]:
+            lines.append(f"\n## {ch.get('headline', '')}")
+            lines.append(ch.get("summary", ""))
+    # Summary
+    if data.get("summary"):
+        lines.append(f"\n--- Summary ---\n{data['summary']}")
+    # Sentiment
+    if data.get("sentiment_analysis_results"):
+        lines.append("\n--- Sentiment ---")
+        for s in data["sentiment_analysis_results"][:20]:
+            lines.append(f"  [{s.get('sentiment', '')}] {s.get('text', '')[:80]}")
+    # Entities
+    if data.get("entities"):
+        lines.append("\n--- Entities ---")
+        for e in data["entities"][:20]:
+            lines.append(f"  {e.get('entity_type', '')}: {e.get('text', '')}")
+    return "\n".join(lines)
+# ── Commands ─────────────────────────────────────────────────────
+def cmd_transcribe(args):
+    """Upload (if local file), create transcript, poll, return result."""
+    api_key = _get_api_key()
+    source = args.source
+    t0 = time.time()
+    # Determine audio URL
+    if source.startswith("http://") or source.startswith("https://"):
+        audio_url = source
+    else:
+        audio_url = _upload_file(source, api_key, args.quiet)
+    # Build and submit
+    body = _build_transcript_body(args, audio_url)
+    if not args.quiet:
+        print(f"  ▸ submitting transcription...", file=sys.stderr)
+    resp = _request("POST", "/v2/transcript", api_key, json_body=body)
+    data = resp.json()
+    transcript_id = data["id"]
+    if not args.quiet:
+        print(f"  · id: {transcript_id}", file=sys.stderr)
+    # Poll
+    result = _poll(transcript_id, api_key, args.quiet)
+    elapsed = round(time.time() - t0, 1)
+    if result.get("status") == "error":
+        if args.json:
+            print(json.dumps({"success": False, "error": result.get("error"), "id": transcript_id}))
+        return 1
+    # Format and output
+    fmt = getattr(args, "format", "text") or "text"
+    output = _format_output(result, fmt, args)
+    if args.json and fmt != "json":
+        print(json.dumps({
+            "success": True,
+            "id": transcript_id,
+            "elapsed_seconds": elapsed,
+            "text": result.get("text", ""),
+            "speakers": len(set(u.get("speaker", "") for u in result.get("utterances", []))),
+            "words": result.get("words", []),
+            "utterances": result.get("utterances", []),
+        }))
+    elif getattr(args, "output", None):
+        outpath = pathlib.Path(args.output)
+        outpath.write_text(output)
+        if not args.quiet:
+            print(f"  ✓ {outpath} ({elapsed}s)", file=sys.stderr)
+        if args.quiet:
+            print(str(outpath.resolve()))
+    else:
+        print(output)
+        if not args.quiet and fmt == "text":
+            print(f"\n  ✓ {elapsed}s · {transcript_id}", file=sys.stderr)
+    return 0
+def cmd_status(args):
+    """Check transcript status."""
+    api_key = _get_api_key()
+    resp = _request("GET", f"/v2/transcript/{args.transcript_id}", api_key)
+    data = resp.json()
+    if args.json:
+        print(json.dumps({"success": True, "id": args.transcript_id, "status": data.get("status"),
+                          "error": data.get("error")}))
+    else:
+        status = data.get("status", "unknown")
+        print(f"  {args.transcript_id}: {status}")
+        if data.get("error"):
+            print(f"  error: {data['error']}")
+    return 0
+def cmd_get(args):
+    """Fetch a completed transcript."""
+    api_key = _get_api_key()
+    resp = _request("GET", f"/v2/transcript/{args.transcript_id}", api_key)
+    data = resp.json()
+    if data.get("status") != "completed":
+        if args.json:
+            print(json.dumps({"success": False, "status": data.get("status"), "error": data.get("error")}))
+        else:
+            print(f"  ✗ transcript not ready: {data.get('status')}", file=sys.stderr)
+        return 1
+    fmt = getattr(args, "format", "text") or "text"
+    output = _format_output(data, fmt, args)
+    if getattr(args, "output", None):
+        pathlib.Path(args.output).write_text(output)
+        print(f"  ✓ {args.output}", file=sys.stderr)
+    else:
+        print(output)
+    return 0
+def cmd_list(args):
+    """List recent transcripts."""
+    api_key = _get_api_key()
+    resp = _request("GET", "/v2/transcript?limit=20", api_key)
+    data = resp.json()
+    transcripts = data.get("transcripts", [])
+    if args.json:
+        print(json.dumps({"success": True, "transcripts": transcripts, "count": len(transcripts)}))
+        return 0
+    if not transcripts:
+        print("  No transcripts found.")
+        return 0
+    print(f"\n  {'ID':<40} {'STATUS':<12} {'CREATED'}")
+    print(f"  {'─' * 70}")
+    for t in transcripts:
+        print(f"  {t.get('id', ''):<40} {t.get('status', ''):<12} {t.get('created', '')}")
+    print()
+    return 0
+# ── Parser ───────────────────────────────────────────────────────
+def register(subparsers):
+    """Register the audio subcommand group."""
+    F = argparse.RawDescriptionHelpFormatter
+    audio_p = subparsers.add_parser(
+        "audio", aliases=["au"], help="Audio transcription (AssemblyAI)",
+        formatter_class=F,
+        epilog="""subcommands:
+  transcribe <file-or-url>    Transcribe audio with speaker diarization
+  status <id>                 Check transcript status
+  get <id>                    Fetch completed transcript
+  list                        List recent transcripts
+examples:
+  escli audio transcribe meeting.mp3 --speakers
+  escli audio transcribe https://example.com/audio.mp3 --speakers-expected 3
+  escli audio transcribe call.wav --speakers --sentiment --summarize
+  escli audio transcribe interview.mp3 --speaker-names "Alice,Bob" -o transcript.txt
+  escli --json --quiet audio transcribe file.mp3 --speakers
+""")
+    audio_subs = audio_p.add_subparsers(dest="audio_command", metavar="subcommand")
+    # transcribe
+    tr_p = audio_subs.add_parser("transcribe", aliases=["t"], help="Transcribe audio")
+    tr_p.add_argument("source", help="Audio file path or URL")
+    tr_p.add_argument("-o", "--output", default=None, help="Write output to file")
+    tr_p.add_argument("--format", choices=["text", "json", "srt", "vtt"], default="text", help="Output format")
+    # Speaker diarization
+    tr_p.add_argument("--speakers", action="store_true", help="Enable speaker labels")
+    tr_p.add_argument("--speakers-expected", type=int, default=None, metavar="N", help="Expected speaker count (1-20)")
+    tr_p.add_argument("--speaker-names", default=None, metavar="A,B,C", help="Identify speakers by name")
+    # Audio intelligence
+    tr_p.add_argument("--sentiment", action="store_true", help="Enable sentiment analysis")
+    tr_p.add_argument("--chapters", action="store_true", help="Enable auto chapters")
+    tr_p.add_argument("--entities", action="store_true", help="Enable entity detection")
+    tr_p.add_argument("--summarize", action="store_true", help="Enable summarization")
+    tr_p.add_argument("--highlights", action="store_true", help="Enable auto highlights")
+    tr_p.add_argument("--topics", action="store_true", help="Enable topic detection (IAB)")
+    tr_p.add_argument("--content-safety", action="store_true", help="Enable content safety detection")
+    # Transcription options
+    tr_p.add_argument("--language", default=None, metavar="CODE", help="Language code (default: auto-detect)")
+    tr_p.add_argument("--dual-channel", action="store_true", help="Dual channel transcription")
+    tr_p.add_argument("--multichannel", action="store_true", help="Multichannel transcription")
+    tr_p.add_argument("--word-boost", default=None, metavar="W1,W2", help="Boost accuracy for words")
+    tr_p.add_argument("--disfluencies", action="store_true", help="Include filler words")
+    tr_p.add_argument("--filter-profanity", action="store_true", help="Filter profanity")
+    tr_p.add_argument("--redact-pii", action="store_true", help="Redact PII")
+    tr_p.set_defaults(func=cmd_transcribe)
+    # status
+    st_p = audio_subs.add_parser("status", aliases=["s"], help="Check transcript status")
+    st_p.add_argument("transcript_id", help="Transcript ID")
+    st_p.set_defaults(func=cmd_status)
+    # get
+    get_p = audio_subs.add_parser("get", aliases=["g"], help="Fetch completed transcript")
+    get_p.add_argument("transcript_id", help="Transcript ID")
+    get_p.add_argument("--format", choices=["text", "json", "srt", "vtt"], default="text", help="Output format")
+    get_p.add_argument("-o", "--output", default=None, help="Write output to file")
+    get_p.set_defaults(func=cmd_get)
+    # list
+    list_p = audio_subs.add_parser("list", aliases=["ls"], help="List recent transcripts")
+    list_p.set_defaults(func=cmd_list)
+    return audio_p