@event4u/agent-config 2.12.0 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/.agent-src/commands/council/analysis.md +142 -0
  2. package/.agent-src/commands/council/debate.md +129 -0
  3. package/.agent-src/commands/council/default.md +8 -0
  4. package/.agent-src/commands/council/design.md +16 -12
  5. package/.agent-src/commands/council/optimize.md +16 -15
  6. package/.agent-src/commands/council/pr.md +12 -12
  7. package/.agent-src/commands/council.md +48 -2
  8. package/.agent-src/commands/memory/learn-low-impact.md +143 -0
  9. package/.agent-src/personas/advisors/contrarian.md +95 -0
  10. package/.agent-src/personas/advisors/executor.md +99 -0
  11. package/.agent-src/personas/advisors/expansionist.md +98 -0
  12. package/.agent-src/personas/advisors/first-principles.md +98 -0
  13. package/.agent-src/personas/advisors/outsider.md +102 -0
  14. package/.agent-src/rules/ask-when-uncertain.md +10 -6
  15. package/.agent-src/rules/copilot-routing.md +19 -0
  16. package/.agent-src/rules/devcontainer-routing.md +20 -0
  17. package/.agent-src/rules/external-reference-deep-dive.md +1 -1
  18. package/.agent-src/rules/fast-path-marker-visibility.md +38 -0
  19. package/.agent-src/rules/laravel-routing.md +20 -0
  20. package/.agent-src/rules/low-impact-corpus-privacy-floor.md +74 -0
  21. package/.agent-src/rules/symfony-routing.md +20 -0
  22. package/.agent-src/skills/ai-council/SKILL.md +388 -10
  23. package/.agent-src/skills/copilot-config/SKILL.md +1 -1
  24. package/.agent-src/skills/devcontainer/SKILL.md +1 -1
  25. package/.agent-src/skills/laravel/SKILL.md +1 -1
  26. package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
  27. package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
  28. package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
  29. package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
  30. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  31. package/.claude-plugin/marketplace.json +4 -1
  32. package/AGENTS.md +1 -1
  33. package/CHANGELOG.md +346 -124
  34. package/CONTRIBUTING.md +5 -0
  35. package/README.md +6 -6
  36. package/config/agent-settings.template.yml +5 -93
  37. package/config/gitignore-block.txt +6 -0
  38. package/docs/architecture/multi-tool-projection.md +53 -0
  39. package/docs/architecture/{compression.md → source-projection.md} +21 -3
  40. package/docs/architecture.md +15 -15
  41. package/docs/archive/CHANGELOG-pre-2.11.0.md +141 -0
  42. package/docs/catalog.md +25 -12
  43. package/docs/contracts/adr-architectural-consensus-mechanism.md +68 -0
  44. package/docs/contracts/adr-level-6-productization.md +7 -9
  45. package/docs/contracts/ai-council-config.md +658 -0
  46. package/docs/contracts/command-clusters.md +58 -2
  47. package/docs/contracts/command-surface-tiers.md +3 -2
  48. package/docs/contracts/cost-profile-defaults.md +5 -0
  49. package/docs/contracts/decision-engine-gates.md +5 -0
  50. package/docs/contracts/decision-trace-v1.md +2 -2
  51. package/docs/contracts/file-ownership-matrix.json +1735 -72
  52. package/docs/contracts/installed-tools-lockfile.md +2 -1
  53. package/docs/contracts/low-impact-corpus-format.md +95 -0
  54. package/docs/contracts/mcp-beta-criteria.md +6 -5
  55. package/docs/contracts/mcp-cloud-scope.md +5 -4
  56. package/docs/contracts/multi-tool-projection-fidelity.md +115 -0
  57. package/docs/contracts/release-trunk-sync.md +4 -3
  58. package/docs/contracts/tier-3-contrib-plugin.md +5 -6
  59. package/docs/getting-started.md +2 -2
  60. package/docs/guidelines/agent-infra/installed-tools-manifest.md +2 -1
  61. package/docs/installation.md +32 -0
  62. package/package.json +1 -1
  63. package/scripts/_archive/README.md +59 -0
  64. package/scripts/_cli/cmd_doctor.py +134 -0
  65. package/scripts/ai_council/_default_prices.py +10 -1
  66. package/scripts/ai_council/advisors.py +148 -0
  67. package/scripts/ai_council/airgap.py +165 -0
  68. package/scripts/ai_council/cli_hints.py +123 -0
  69. package/scripts/ai_council/clients.py +959 -5
  70. package/scripts/ai_council/compile_corpus.py +178 -0
  71. package/scripts/ai_council/confidence_gate.py +156 -0
  72. package/scripts/ai_council/config.py +1364 -0
  73. package/scripts/ai_council/consensus.py +329 -0
  74. package/scripts/ai_council/events_log.py +137 -0
  75. package/scripts/ai_council/learn_low_impact_preview.py +252 -0
  76. package/scripts/ai_council/low_impact.py +714 -0
  77. package/scripts/ai_council/low_impact_corpus.py +466 -0
  78. package/scripts/ai_council/low_impact_intake.py +163 -0
  79. package/scripts/ai_council/modes.py +6 -1
  80. package/scripts/ai_council/necessity.py +782 -0
  81. package/scripts/ai_council/orchestrator.py +872 -20
  82. package/scripts/ai_council/probation_gate.py +152 -0
  83. package/scripts/ai_council/prompts.py +335 -0
  84. package/scripts/ai_council/redact_low_impact_entry.py +155 -0
  85. package/scripts/ai_council/replay.py +155 -0
  86. package/scripts/ai_council/session.py +19 -1
  87. package/scripts/ai_council/shadow_dispatch.py +235 -0
  88. package/scripts/ai_council/solo_dispatch.py +226 -0
  89. package/scripts/audit_cloud_compatibility.py +74 -0
  90. package/scripts/audit_command_surface.py +363 -0
  91. package/scripts/check_compressed_paths.py +6 -1
  92. package/scripts/check_council_layout.py +11 -0
  93. package/scripts/ci_time_ratio.py +168 -0
  94. package/scripts/council_cli.py +2005 -30
  95. package/scripts/install.sh +12 -0
  96. package/scripts/measure_projection_bytes.py +159 -0
  97. package/scripts/measure_roadmap_trajectory.py +112 -0
  98. package/scripts/probe_projection_fidelity.py +202 -0
  99. package/scripts/score_skill_selection.py +198 -0
  100. package/scripts/skill_collision_clusters.py +162 -0
  101. /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
  102. /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
  103. /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
  104. /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
  105. /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
  106. /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
  107. /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
@@ -13,21 +13,35 @@ Mirrors the contract from `scripts/skill_trigger_eval.py`:
13
13
  Tests inject mock clients via the `client=` constructor argument and
14
14
  never hit the real API.
15
15
 
16
- Mode contract (Phase 2b):
17
- - `billable=True` clients (AnthropicClient, OpenAIClient) participate
18
- in the cost gate — projected USD spend is checked before each call.
19
- - `billable=False` clients (ManualClient, future PlaywrightClient)
20
- skip the cost gate entirely. Spend = $0 to us; provider-side rate
16
+ Mode contract:
17
+ - `billable=True` clients (AnthropicClient, OpenAIClient, GeminiClient,
18
+ XAIClient, PerplexityClient) participate in the cost gate — projected
19
+ USD spend is checked before each call.
20
+ - `billable=False` clients (ManualClient, vendor-official CliClient
21
+ subclasses — AnthropicCliClient, OpenAICliClient, GeminiCliClient)
22
+ skip the USD cost gate entirely. Spend = $0 to us; provider-side
21
23
  limits are the user's concern.
24
+ - `billable=True` CLI subclasses (XAICliClient, PerplexityCliClient)
25
+ wrap community-maintained CLIs that consume the same API key as
26
+ their `api` counterparts — they participate in the USD cost gate.
27
+ `mode: cli` here is an ergonomic shortcut, not a billing change.
28
+
29
+ CLI subclasses additionally consult the optional
30
+ `cli_call_budget.max_calls_per_day.<provider>` quota with state
31
+ persisted at `~/.event4u/agent-config/cli-calls.json` (daily UTC reset).
22
32
  """
23
33
 
24
34
  from __future__ import annotations
25
35
 
36
+ import json
37
+ import shutil
26
38
  import stat
39
+ import subprocess
27
40
  import sys
28
41
  import time
29
42
  from abc import ABC, abstractmethod
30
43
  from dataclasses import dataclass, field
44
+ from datetime import datetime, timezone
31
45
  from pathlib import Path
32
46
  from typing import TextIO
33
47
 
@@ -52,6 +66,15 @@ def _resolve_key_path(filename: str) -> Path:
52
66
 
53
67
  DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5"
54
68
  DEFAULT_OPENAI_MODEL = "gpt-4o"
69
+ DEFAULT_GEMINI_MODEL = "gemini-2.5-pro"
70
+ DEFAULT_XAI_MODEL = "grok-4"
71
+ DEFAULT_PERPLEXITY_MODEL = "sonar-pro"
72
+
73
+ #: OpenAI-API-compatible endpoints. xAI and Perplexity both expose the
74
+ #: ``/v1/chat/completions`` shape, so their clients reuse the ``openai``
75
+ #: SDK with a custom ``base_url``. Gemini has its own SDK (``google-genai``).
76
+ XAI_BASE_URL = "https://api.x.ai/v1"
77
+ PERPLEXITY_BASE_URL = "https://api.perplexity.ai"
55
78
 
56
79
  #: Per-call output budget when no caller-supplied value reaches `ask()`.
57
80
  #: The CLI resolves the live default from `ai_council.max_output_tokens`
@@ -135,6 +158,8 @@ class ExternalAIClient(ABC):
135
158
  name: str = ""
136
159
  model: str = ""
137
160
  billable: bool = True # API-mode subclasses spend money; manual doesn't.
161
+ transport: str = "api" # "api" | "cli" | "manual" — surfaced in session manifest.
162
+ subscription_label: str = "" # vendor-CLI label (e.g. "claude") for non-billable transports.
138
163
 
139
164
  @abstractmethod
140
165
  def ask(
@@ -269,6 +294,934 @@ class OpenAIClient(ExternalAIClient):
269
294
  )
270
295
 
271
296
 
297
+ # ── Gemini / xAI / Perplexity (Phase 0 — Step 6) ─────────────────────
298
+
299
+
300
+ class GeminiClient(ExternalAIClient):
301
+ """Google Gemini via the ``google-genai`` SDK.
302
+
303
+ Lazy-imports ``google.genai`` on first ``ask()`` so disabled
304
+ members do not require the SDK to be installed. Tests inject a
305
+ mock client shaped like ``genai.Client(api_key=...)`` —
306
+ ``self._client.models.generate_content(...)`` returns an object
307
+ with ``.text`` and ``.usage_metadata.{prompt_token_count,
308
+ candidates_token_count}``.
309
+ """
310
+
311
+ name = "gemini"
312
+ billable = True
313
+
314
+ def __init__(
315
+ self,
316
+ model: str = DEFAULT_GEMINI_MODEL,
317
+ client: object = None,
318
+ api_key: str | None = None,
319
+ ):
320
+ self.model = model
321
+ if client is not None:
322
+ self._client = client
323
+ return
324
+ if api_key is None:
325
+ raise RuntimeError(
326
+ "GeminiClient requires explicit api_key or injected client. "
327
+ "Use `api_key_ref: env:GEMINI_API_KEY` in agents/.ai-council.yml."
328
+ )
329
+ try:
330
+ from google import genai # type: ignore[import-not-found]
331
+ except ImportError as exc: # pragma: no cover - exercised only with real SDK
332
+ raise RuntimeError(
333
+ "google-genai package not installed. `pip install google-genai`."
334
+ ) from exc
335
+ self._client = genai.Client(api_key=api_key)
336
+
337
+ def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS) -> CouncilResponse:
338
+ t0 = time.monotonic()
339
+ contents = f"{system_prompt}\n\n---\n\n{user_prompt}"
340
+ try:
341
+ response = self._client.models.generate_content(
342
+ model=self.model,
343
+ contents=contents,
344
+ config={"max_output_tokens": max_tokens},
345
+ )
346
+ except Exception as exc: # noqa: BLE001 - normalise all SDK errors
347
+ return CouncilResponse(
348
+ provider=self.name, model=self.model, text="",
349
+ latency_ms=int((time.monotonic() - t0) * 1000),
350
+ error=f"{type(exc).__name__}: {exc}",
351
+ )
352
+ latency_ms = int((time.monotonic() - t0) * 1000)
353
+ text = getattr(response, "text", "") or ""
354
+ usage = getattr(response, "usage_metadata", None)
355
+ return CouncilResponse(
356
+ provider=self.name, model=self.model, text=text,
357
+ input_tokens=getattr(usage, "prompt_token_count", 0) if usage else 0,
358
+ output_tokens=getattr(usage, "candidates_token_count", 0) if usage else 0,
359
+ latency_ms=latency_ms,
360
+ )
361
+
362
+
363
+ class _OpenAICompatibleClient(ExternalAIClient):
364
+ """Shared shape for OpenAI-API-compatible providers (xAI, Perplexity).
365
+
366
+ Both vendors implement ``/v1/chat/completions`` and accept the
367
+ ``openai`` Python SDK with a custom ``base_url``. The reasoning-
368
+ model branch from :class:`OpenAIClient` is intentionally omitted —
369
+ neither xAI nor Perplexity ships a reasoning model that requires
370
+ ``max_completion_tokens`` as of 2026-05-14.
371
+ """
372
+
373
+ billable = True
374
+ base_url: str = ""
375
+
376
+ def __init__(
377
+ self,
378
+ model: str,
379
+ client: object = None,
380
+ api_key: str | None = None,
381
+ ):
382
+ self.model = model
383
+ if client is not None:
384
+ self._client = client
385
+ return
386
+ if api_key is None:
387
+ raise RuntimeError(
388
+ f"{type(self).__name__} requires explicit api_key or injected client."
389
+ )
390
+ try:
391
+ import openai # type: ignore[import-not-found]
392
+ except ImportError as exc: # pragma: no cover - exercised only with real SDK
393
+ raise RuntimeError(
394
+ "openai package not installed. `pip install openai`."
395
+ ) from exc
396
+ self._client = openai.OpenAI(api_key=api_key, base_url=self.base_url)
397
+
398
+ def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS) -> CouncilResponse:
399
+ t0 = time.monotonic()
400
+ try:
401
+ response = self._client.chat.completions.create(
402
+ model=self.model,
403
+ max_tokens=max_tokens,
404
+ messages=[
405
+ {"role": "system", "content": system_prompt},
406
+ {"role": "user", "content": user_prompt},
407
+ ],
408
+ )
409
+ except Exception as exc: # noqa: BLE001 - normalise all SDK errors
410
+ return CouncilResponse(
411
+ provider=self.name, model=self.model, text="",
412
+ latency_ms=int((time.monotonic() - t0) * 1000),
413
+ error=f"{type(exc).__name__}: {exc}",
414
+ )
415
+ latency_ms = int((time.monotonic() - t0) * 1000)
416
+ text = ""
417
+ choices = getattr(response, "choices", None)
418
+ if choices:
419
+ msg = getattr(choices[0], "message", None)
420
+ text = getattr(msg, "content", "") if msg else ""
421
+ usage = getattr(response, "usage", None)
422
+ return CouncilResponse(
423
+ provider=self.name, model=self.model, text=text or "",
424
+ input_tokens=getattr(usage, "prompt_tokens", 0) if usage else 0,
425
+ output_tokens=getattr(usage, "completion_tokens", 0) if usage else 0,
426
+ latency_ms=latency_ms,
427
+ )
428
+
429
+
430
+ class XAIClient(_OpenAICompatibleClient):
431
+ """xAI Grok via the OpenAI-compatible endpoint at api.x.ai/v1."""
432
+
433
+ name = "xai"
434
+ base_url = XAI_BASE_URL
435
+
436
+ def __init__(
437
+ self,
438
+ model: str = DEFAULT_XAI_MODEL,
439
+ client: object = None,
440
+ api_key: str | None = None,
441
+ ):
442
+ super().__init__(model=model, client=client, api_key=api_key)
443
+
444
+
445
+ class PerplexityClient(_OpenAICompatibleClient):
446
+ """Perplexity via the OpenAI-compatible endpoint at api.perplexity.ai."""
447
+
448
+ name = "perplexity"
449
+ base_url = PERPLEXITY_BASE_URL
450
+
451
+ def __init__(
452
+ self,
453
+ model: str = DEFAULT_PERPLEXITY_MODEL,
454
+ client: object = None,
455
+ api_key: str | None = None,
456
+ ):
457
+ super().__init__(model=model, client=client, api_key=api_key)
458
+
459
+
460
+ # ── CLI transport (step-1 Phase 1+) ──────────────────────────────────
461
+
462
+
463
+ CLI_CALLS_FILENAME = "cli-calls.json"
464
+
465
+ #: Default subprocess timeout for a single CLI call. Long enough for the
466
+ #: largest frontier models to think; short enough to surface a hung
467
+ #: subprocess without freezing the council run.
468
+ DEFAULT_CLI_TIMEOUT_SECONDS = 120.0
469
+
470
+
471
+ class CliClientError(RuntimeError):
472
+ """Raised when a CLI member cannot be constructed (binary missing, etc.)."""
473
+
474
+
475
+ def _cli_calls_state_path() -> Path:
476
+ """Return the canonical write target for the daily-quota counter."""
477
+ return user_global_paths.write_target(CLI_CALLS_FILENAME)
478
+
479
+
480
+ def _today_utc_iso() -> str:
481
+ return datetime.now(timezone.utc).date().isoformat()
482
+
483
+
484
+ def load_cli_call_counts(path: Path | None = None) -> dict[str, int]:
485
+ """Return today's per-provider call counts. Empty dict on UTC rollover."""
486
+ p = path if path is not None else _cli_calls_state_path()
487
+ if not p.exists():
488
+ return {}
489
+ try:
490
+ data = json.loads(p.read_text(encoding="utf-8"))
491
+ except (json.JSONDecodeError, OSError):
492
+ return {}
493
+ if not isinstance(data, dict) or data.get("date") != _today_utc_iso():
494
+ return {}
495
+ counts = data.get("counts", {})
496
+ if not isinstance(counts, dict):
497
+ return {}
498
+ return {str(k): int(v) for k, v in counts.items() if isinstance(v, (int, str))}
499
+
500
+
501
+ def record_cli_call(provider: str, path: Path | None = None) -> int:
502
+ """Increment today's call count for `provider`. Returns new total."""
503
+ p = path if path is not None else _cli_calls_state_path()
504
+ counts = load_cli_call_counts(p)
505
+ counts[provider] = counts.get(provider, 0) + 1
506
+ p.parent.mkdir(parents=True, exist_ok=True)
507
+ p.write_text(
508
+ json.dumps({"date": _today_utc_iso(), "counts": counts}, indent=2),
509
+ encoding="utf-8",
510
+ )
511
+ return counts[provider]
512
+
513
+
514
+ def reset_cli_call_counts(
515
+ provider: str | None = None,
516
+ path: Path | None = None,
517
+ ) -> dict[str, int]:
518
+ """Reset the per-provider call counter (step-8 P1, `council quota --reset`).
519
+
520
+ ``provider=None`` clears all providers (today's record). Otherwise
521
+ only the named provider's count is removed; other providers and
522
+ the UTC date marker are preserved. Returns the post-reset counts.
523
+ """
524
+ p = path if path is not None else _cli_calls_state_path()
525
+ counts = load_cli_call_counts(p)
526
+ if provider is None:
527
+ counts = {}
528
+ else:
529
+ counts.pop(provider, None)
530
+ p.parent.mkdir(parents=True, exist_ok=True)
531
+ p.write_text(
532
+ json.dumps({"date": _today_utc_iso(), "counts": counts}, indent=2),
533
+ encoding="utf-8",
534
+ )
535
+ return counts
536
+
537
+
538
+ def quota_summary_line(
539
+ clients: "list[CliClient]",
540
+ *,
541
+ cli_calls_path: Path | None = None,
542
+ ) -> tuple[str, list[str]]:
543
+ """Build the pre-run quota summary line (step-8 P1, D1 + D4).
544
+
545
+ Returns ``(summary, warn_providers)`` where ``summary`` is the
546
+ formatted one-liner (empty string when no CLI member has a
547
+ configured cap) and ``warn_providers`` is the subset whose
548
+ ``used / max_calls_per_day`` ratio crossed ``warn_at``. Uncapped
549
+ providers (``max_calls_per_day is None``) are omitted from the
550
+ summary entirely — they cannot exceed a threshold that does not
551
+ exist.
552
+
553
+ Tested in ``tests/test_cli_quota_warn.py``.
554
+ """
555
+ capped = [c for c in clients if getattr(c, "max_calls_per_day", None)]
556
+ if not capped:
557
+ return "", []
558
+ # Read state once for the whole summary — call counts only mutate
559
+ # inside ``CliClient.ask`` (sequential per-member dispatch), so the
560
+ # pre-run snapshot is always consistent with what's about to fire.
561
+ counts = load_cli_call_counts(cli_calls_path)
562
+ parts: list[str] = []
563
+ warn: list[str] = []
564
+ for c in capped:
565
+ name = getattr(c, "name", "?")
566
+ used = int(counts.get(name, 0))
567
+ limit = int(c.max_calls_per_day)
568
+ parts.append(f"{name} {used}/{limit}")
569
+ ratio = used / limit if limit > 0 else 0.0
570
+ warn_at = float(getattr(c, "warn_at", 0.8))
571
+ if ratio >= warn_at:
572
+ warn.append(name)
573
+ prefix = "⚠️ " if warn else ""
574
+ return f"{prefix}council:quota · " + " · ".join(parts), warn
575
+
576
+
577
+ class CliClient(ExternalAIClient):
578
+ """Shell-out council member — subscription-authed transport.
579
+
580
+ Spawns a locally-installed provider CLI via ``subprocess.run``. Auth
581
+ is delegated to the binary itself (Claude CLI, Codex CLI, Gemini
582
+ CLI, etc. use the user's logged-in subscription session). Spend is
583
+ $0 from this loader's perspective — ``billable=False`` keeps the
584
+ USD cost gate from firing.
585
+
586
+ Provider subscription quotas (Claude Pro 5h windows, ChatGPT Plus
587
+ message caps, Gemini free-tier limits) are guarded by the optional
588
+ ``cli_call_budget.max_calls_per_day.<provider>`` config. Counter
589
+ state lives at ``~/.event4u/agent-config/cli-calls.json`` and
590
+ resets on UTC date rollover.
591
+
592
+ Subclass contract:
593
+
594
+ - ``name``: provider key (`anthropic`, `openai`, `gemini`, …).
595
+ - ``default_binary``: executable name resolved via ``shutil.which``
596
+ when the member-level ``binary:`` field is not set.
597
+ - ``_build_command(system_prompt, user_prompt, max_tokens)``:
598
+ return the argv list to execute.
599
+ - ``_parse_output(stdout, stderr)``: return a partial
600
+ ``CouncilResponse`` (``provider``, ``model``, ``text``,
601
+ ``input_tokens``, ``output_tokens``, ``metadata``). The base
602
+ ``ask()`` fills in ``latency_ms``.
603
+
604
+ Construction validates the binary up front — a missing CLI fails
605
+ fast with ``CliClientError`` so the loader can surface a structured
606
+ "skip member with reason" entry rather than crashing the run.
607
+
608
+ Stderr heuristics map known failure shapes to short error codes:
609
+
610
+ - ``auth_expired`` — authentication / login / unauthorized.
611
+ - ``timeout`` — subprocess timeout or deadline exceeded.
612
+ - ``cli_quota_exhausted`` — rate-limit / quota messaging from the
613
+ provider, OR the local counter has hit ``max_calls_per_day``.
614
+ - ``parse_failed`` — non-zero exit absent + stdout was not parseable.
615
+ - ``exit_<N>`` — fallback for any non-zero exit code without a known
616
+ stderr pattern.
617
+ """
618
+
619
+ billable = False
620
+ transport = "cli"
621
+ default_binary: str = ""
622
+
623
+ _AUTH_FAILURE_PATTERNS = (
624
+ "authentication", "unauthorized", "auth failed", "auth_error",
625
+ "login", "not logged in", "session expired", "invalid credentials",
626
+ )
627
+ _TIMEOUT_PATTERNS = ("timeout", "timed out", "deadline exceeded")
628
+ _QUOTA_PATTERNS = (
629
+ "rate limit", "rate_limit", "rate-limit", "quota exceeded",
630
+ "too many requests", "429", "usage limit",
631
+ )
632
+
633
+ def __init__(
634
+ self,
635
+ *,
636
+ model: str,
637
+ binary: str | None = None,
638
+ timeout_seconds: float = DEFAULT_CLI_TIMEOUT_SECONDS,
639
+ max_calls_per_day: int | None = None,
640
+ warn_at: float = 0.8,
641
+ cli_calls_path: Path | None = None,
642
+ ):
643
+ self.model = model
644
+ self.timeout_seconds = timeout_seconds
645
+ self.max_calls_per_day = max_calls_per_day
646
+ self.warn_at = warn_at
647
+ self._cli_calls_path = cli_calls_path
648
+ if binary is not None:
649
+ self.binary = binary
650
+ else:
651
+ if not self.default_binary:
652
+ raise CliClientError(
653
+ f"{type(self).__name__}: no `default_binary` set on subclass; "
654
+ f"either fix the class or pass `binary=` explicitly."
655
+ )
656
+ resolved = shutil.which(self.default_binary)
657
+ if resolved is None:
658
+ raise CliClientError(
659
+ f"{type(self).__name__}: binary {self.default_binary!r} "
660
+ f"not found on PATH. Install the provider CLI or set "
661
+ f"`members.{self.name}.binary:` in agents/.ai-council.yml."
662
+ )
663
+ self.binary = resolved
664
+
665
+ # ── subclass hooks ────────────────────────────────────────────
666
+
667
+ @abstractmethod
668
+ def _build_command(
669
+ self, system_prompt: str, user_prompt: str, max_tokens: int
670
+ ) -> list[str]:
671
+ """Return the argv list the subprocess should execute.
672
+
673
+ ``self.binary`` is already resolved to an absolute path. Subclasses
674
+ return ``[self.binary, ...flags...]`` and pass the prompt either
675
+ via argv (small) or via stdin (large) — see ``_stdin_payload``.
676
+ """
677
+
678
+ @abstractmethod
679
+ def _parse_output(
680
+ self, stdout: str, stderr: str
681
+ ) -> CouncilResponse:
682
+ """Parse provider-specific stdout into a CouncilResponse.
683
+
684
+ ``latency_ms`` and ``error`` are set by the base ``ask()`` wrapper;
685
+ subclasses populate ``provider``, ``model``, ``text``,
686
+ ``input_tokens``, ``output_tokens``, and any ``metadata``.
687
+ """
688
+
689
+ def _stdin_payload(self, system_prompt: str, user_prompt: str) -> str | None:
690
+ """Return text to send on stdin, or ``None`` to inherit caller's stdin.
691
+
692
+ Default: ``None`` — subclasses that prefer stdin-piped prompts
693
+ override (typical for long prompts that would blow argv limits).
694
+ """
695
+ return None
696
+
697
+ # ── ask() ──────────────────────────────────────────────────────
698
+
699
+ def ask(
700
+ self,
701
+ system_prompt: str,
702
+ user_prompt: str,
703
+ max_tokens: int = DEFAULT_MAX_TOKENS,
704
+ ) -> CouncilResponse:
705
+ t0 = time.monotonic()
706
+
707
+ # 1. quota gate — local counter check before spawning anything.
708
+ if self.max_calls_per_day is not None:
709
+ counts = load_cli_call_counts(self._cli_calls_path)
710
+ used = counts.get(self.name, 0)
711
+ if used >= self.max_calls_per_day:
712
+ # step-8 D3 — record the block on the persistent events
713
+ # log. Lazy import to keep clients.py independent of the
714
+ # CLI layer at module load time.
715
+ try:
716
+ from scripts.ai_council.events_log import append_event
717
+ append_event({
718
+ "lens": "",
719
+ "invocation": "",
720
+ "action": "block_quota",
721
+ "verdict": "",
722
+ "provider_caps": {
723
+ self.name: {
724
+ "mode": "cli", "model": self.model,
725
+ },
726
+ },
727
+ "original_ask": user_prompt,
728
+ "cli_calls_used": used,
729
+ "cli_calls_max": self.max_calls_per_day,
730
+ })
731
+ except Exception: # pragma: no cover — never crash ask()
732
+ pass
733
+ return CouncilResponse(
734
+ provider=self.name, model=self.model, text="",
735
+ latency_ms=int((time.monotonic() - t0) * 1000),
736
+ error="cli_quota_exhausted",
737
+ metadata={
738
+ "cli": True,
739
+ "cli_calls_used": used,
740
+ "cli_calls_max": self.max_calls_per_day,
741
+ },
742
+ )
743
+
744
+ # 2. build command + spawn.
745
+ cmd = self._build_command(system_prompt, user_prompt, max_tokens)
746
+ stdin_payload = self._stdin_payload(system_prompt, user_prompt)
747
+ try:
748
+ proc = subprocess.run(
749
+ cmd,
750
+ input=stdin_payload,
751
+ capture_output=True,
752
+ text=True,
753
+ timeout=self.timeout_seconds,
754
+ check=False,
755
+ )
756
+ except subprocess.TimeoutExpired:
757
+ return CouncilResponse(
758
+ provider=self.name, model=self.model, text="",
759
+ latency_ms=int((time.monotonic() - t0) * 1000),
760
+ error="timeout",
761
+ metadata={"cli": True, "timeout_seconds": self.timeout_seconds},
762
+ )
763
+ except FileNotFoundError:
764
+ return CouncilResponse(
765
+ provider=self.name, model=self.model, text="",
766
+ latency_ms=int((time.monotonic() - t0) * 1000),
767
+ error="binary_missing",
768
+ metadata={"cli": True, "binary": self.binary},
769
+ )
770
+ except OSError as exc:
771
+ return CouncilResponse(
772
+ provider=self.name, model=self.model, text="",
773
+ latency_ms=int((time.monotonic() - t0) * 1000),
774
+ error=f"os_error: {type(exc).__name__}",
775
+ metadata={"cli": True},
776
+ )
777
+
778
+ # 3. record the call — even failures count against the quota so
779
+ # a broken CLI cannot burn the whole budget in a tight loop.
780
+ try:
781
+ record_cli_call(self.name, self._cli_calls_path)
782
+ except OSError: # state-file write failure is non-fatal here.
783
+ pass
784
+
785
+ latency_ms = int((time.monotonic() - t0) * 1000)
786
+
787
+ # 4. non-zero exit → classify and bail.
788
+ if proc.returncode != 0:
789
+ code = self._classify_stderr(proc.stderr or "", proc.returncode)
790
+ return CouncilResponse(
791
+ provider=self.name, model=self.model, text="",
792
+ latency_ms=latency_ms,
793
+ error=code,
794
+ metadata={
795
+ "cli": True,
796
+ "returncode": proc.returncode,
797
+ "stderr_tail": (proc.stderr or "")[-500:],
798
+ },
799
+ )
800
+
801
+ # 5. parse stdout via the subclass hook.
802
+ try:
803
+ response = self._parse_output(proc.stdout or "", proc.stderr or "")
804
+ except Exception as exc: # noqa: BLE001 — defensive: parse must never crash the run.
805
+ return CouncilResponse(
806
+ provider=self.name, model=self.model,
807
+ text=proc.stdout or "",
808
+ latency_ms=latency_ms,
809
+ error=f"parse_failed: {type(exc).__name__}",
810
+ metadata={"cli": True, "stderr_tail": (proc.stderr or "")[-500:]},
811
+ )
812
+ response.latency_ms = latency_ms
813
+ meta = dict(response.metadata)
814
+ meta.setdefault("cli", True)
815
+ response.metadata = meta
816
+ return response
817
+
818
+ @classmethod
819
+ def _classify_stderr(cls, stderr: str, returncode: int) -> str:
820
+ haystack = stderr.lower()
821
+ if any(p in haystack for p in cls._AUTH_FAILURE_PATTERNS):
822
+ return "auth_expired"
823
+ if any(p in haystack for p in cls._TIMEOUT_PATTERNS):
824
+ return "timeout"
825
+ if any(p in haystack for p in cls._QUOTA_PATTERNS):
826
+ return "cli_quota_exhausted"
827
+ return f"exit_{returncode}"
828
+
829
+
830
+ class AnthropicCliClient(CliClient):
831
+ """Claude via the official `claude` CLI (subscription-authed).
832
+
833
+ Invokes ``claude --print --output-format json`` and consumes the
834
+ structured envelope: ``{"result": str, "usage": {"input_tokens":
835
+ int, "output_tokens": int}, "session_id": str, ...}``. The prompt
836
+ is piped on stdin so it never collides with argv length limits.
837
+
838
+ Auth is delegated to the CLI's own session — the user runs
839
+ ``claude /login`` once and the orchestrator inherits the
840
+ subscription. No API key flows through this process.
841
+ """
842
+
843
+ name = "anthropic"
844
+ default_binary = "claude"
845
+ subscription_label = "claude-pro"
846
+
847
+ def __init__(
848
+ self,
849
+ *,
850
+ model: str = "claude-sonnet-4-5",
851
+ binary: str | None = None,
852
+ timeout_seconds: float = DEFAULT_CLI_TIMEOUT_SECONDS,
853
+ max_calls_per_day: int | None = None,
854
+ warn_at: float = 0.8,
855
+ cli_calls_path: Path | None = None,
856
+ ):
857
+ super().__init__(
858
+ model=model,
859
+ binary=binary,
860
+ timeout_seconds=timeout_seconds,
861
+ max_calls_per_day=max_calls_per_day,
862
+ warn_at=warn_at,
863
+ cli_calls_path=cli_calls_path,
864
+ )
865
+
866
+ def _build_command(
867
+ self, system_prompt: str, user_prompt: str, max_tokens: int # noqa: ARG002
868
+ ) -> list[str]:
869
+ return [
870
+ self.binary,
871
+ "--print",
872
+ "--output-format", "json",
873
+ "--model", self.model,
874
+ "--append-system-prompt", system_prompt,
875
+ ]
876
+
877
+ def _stdin_payload(self, system_prompt: str, user_prompt: str) -> str | None: # noqa: ARG002
878
+ return user_prompt
879
+
880
+ def _parse_output(self, stdout: str, stderr: str) -> CouncilResponse: # noqa: ARG002
881
+ envelope = json.loads(stdout)
882
+ if not isinstance(envelope, dict):
883
+ raise ValueError("expected JSON object at the top level of claude CLI output")
884
+ text = str(envelope.get("result", "")).strip()
885
+ usage = envelope.get("usage") or {}
886
+ if not isinstance(usage, dict):
887
+ usage = {}
888
+ meta: dict[str, object] = {}
889
+ session_id = envelope.get("session_id")
890
+ if session_id:
891
+ meta["session_id"] = str(session_id)
892
+ total_cost = envelope.get("total_cost_usd")
893
+ if total_cost is not None:
894
+ meta["reported_cost_usd"] = total_cost
895
+ duration_ms = envelope.get("duration_ms")
896
+ if duration_ms is not None:
897
+ meta["reported_duration_ms"] = duration_ms
898
+ return CouncilResponse(
899
+ provider=self.name, model=self.model, text=text,
900
+ input_tokens=int(usage.get("input_tokens", 0) or 0),
901
+ output_tokens=int(usage.get("output_tokens", 0) or 0),
902
+ metadata=meta,
903
+ )
904
+
905
+
906
+ class OpenAICliClient(CliClient):
907
+ """OpenAI via the official `codex` CLI (subscription-authed).
908
+
909
+ Invokes ``codex exec --json <prompt>`` and consumes the
910
+ newline-delimited JSON event stream. The user prompt rides on
911
+ argv (Codex does not read prompts from stdin in ``exec`` mode);
912
+ the system prompt is passed via ``--system`` when non-empty.
913
+
914
+ Auth is delegated to the CLI's own session — the user runs
915
+ ``codex login`` once and the orchestrator inherits the
916
+ subscription. No API key flows through this process.
917
+
918
+ Output shape: one JSON object per line. The terminal event has
919
+ ``type == "item.completed"`` with the final assistant message in
920
+ ``item.content[0].text``; a separate ``type == "turn.completed"``
921
+ event carries token usage in ``usage.input_tokens`` /
922
+ ``usage.output_tokens``. Robust against the order of events and
923
+ against unknown event types (silently skipped).
924
+ """
925
+
926
+ name = "openai"
927
+ default_binary = "codex"
928
+ subscription_label = "chatgpt-plus"
929
+
930
+ _AUTH_FAILURE_PATTERNS = CliClient._AUTH_FAILURE_PATTERNS + (
931
+ "codex login", "auth_required", "401",
932
+ )
933
+
934
+ def __init__(
935
+ self,
936
+ *,
937
+ model: str = "gpt-5",
938
+ binary: str | None = None,
939
+ timeout_seconds: float = DEFAULT_CLI_TIMEOUT_SECONDS,
940
+ max_calls_per_day: int | None = None,
941
+ warn_at: float = 0.8,
942
+ cli_calls_path: Path | None = None,
943
+ ):
944
+ super().__init__(
945
+ model=model,
946
+ binary=binary,
947
+ timeout_seconds=timeout_seconds,
948
+ max_calls_per_day=max_calls_per_day,
949
+ warn_at=warn_at,
950
+ cli_calls_path=cli_calls_path,
951
+ )
952
+
953
+ def _build_command(
954
+ self, system_prompt: str, user_prompt: str, max_tokens: int # noqa: ARG002
955
+ ) -> list[str]:
956
+ cmd = [self.binary, "exec", "--json", "--model", self.model]
957
+ if system_prompt:
958
+ cmd.extend(["--system", system_prompt])
959
+ cmd.append(user_prompt)
960
+ return cmd
961
+
962
+ def _parse_output(self, stdout: str, stderr: str) -> CouncilResponse: # noqa: ARG002
963
+ text = ""
964
+ input_tokens = 0
965
+ output_tokens = 0
966
+ meta: dict[str, object] = {}
967
+ for line in stdout.splitlines():
968
+ line = line.strip()
969
+ if not line:
970
+ continue
971
+ try:
972
+ event = json.loads(line)
973
+ except json.JSONDecodeError:
974
+ continue
975
+ if not isinstance(event, dict):
976
+ continue
977
+ event_type = event.get("type")
978
+ if event_type == "item.completed":
979
+ item = event.get("item") or {}
980
+ if isinstance(item, dict):
981
+ content = item.get("content") or []
982
+ if isinstance(content, list):
983
+ chunks: list[str] = []
984
+ for entry in content:
985
+ if isinstance(entry, dict) and entry.get("text"):
986
+ chunks.append(str(entry["text"]))
987
+ if chunks:
988
+ text = "\n".join(chunks).strip()
989
+ if item.get("id"):
990
+ meta["item_id"] = str(item["id"])
991
+ elif event_type == "turn.completed":
992
+ usage = event.get("usage") or {}
993
+ if isinstance(usage, dict):
994
+ input_tokens = int(usage.get("input_tokens", 0) or 0)
995
+ output_tokens = int(usage.get("output_tokens", 0) or 0)
996
+ elif event_type == "session.created":
997
+ if event.get("session_id"):
998
+ meta["session_id"] = str(event["session_id"])
999
+ return CouncilResponse(
1000
+ provider=self.name, model=self.model, text=text,
1001
+ input_tokens=input_tokens, output_tokens=output_tokens,
1002
+ metadata=meta,
1003
+ )
1004
+
1005
+
1006
+ class GeminiCliClient(CliClient):
1007
+ """Google Gemini via the official `gemini` CLI (free-tier subscription).
1008
+
1009
+ Invokes ``gemini --prompt <prompt> --output-format json`` and
1010
+ consumes the structured envelope: ``{"response": str, "stats":
1011
+ {"models": {"<model>": {"tokens": {"prompt": int, "candidates":
1012
+ int}}}}, ...}``. Prompt is piped on stdin to dodge argv limits.
1013
+
1014
+ Auth is delegated to the CLI's own session — the user runs
1015
+ ``gemini`` once interactively to set up OAuth, then the
1016
+ orchestrator inherits the consent. Free-tier quotas apply at the
1017
+ Google account level; ``cli_call_budget`` enforces a local mirror.
1018
+ """
1019
+
1020
+ name = "gemini"
1021
+ default_binary = "gemini"
1022
+ subscription_label = "gemini-pro"
1023
+
1024
+ _AUTH_FAILURE_PATTERNS = CliClient._AUTH_FAILURE_PATTERNS + (
1025
+ "interactive consent could not be obtained",
1026
+ "please run `gemini`",
1027
+ "oauth",
1028
+ )
1029
+
1030
+ def __init__(
1031
+ self,
1032
+ *,
1033
+ model: str = "gemini-2.5-pro",
1034
+ binary: str | None = None,
1035
+ timeout_seconds: float = DEFAULT_CLI_TIMEOUT_SECONDS,
1036
+ max_calls_per_day: int | None = None,
1037
+ warn_at: float = 0.8,
1038
+ cli_calls_path: Path | None = None,
1039
+ ):
1040
+ super().__init__(
1041
+ model=model,
1042
+ binary=binary,
1043
+ timeout_seconds=timeout_seconds,
1044
+ max_calls_per_day=max_calls_per_day,
1045
+ warn_at=warn_at,
1046
+ cli_calls_path=cli_calls_path,
1047
+ )
1048
+
1049
+ def _build_command(
1050
+ self, system_prompt: str, user_prompt: str, max_tokens: int # noqa: ARG002
1051
+ ) -> list[str]:
1052
+ cmd = [
1053
+ self.binary,
1054
+ "--output-format", "json",
1055
+ "--model", self.model,
1056
+ ]
1057
+ if system_prompt:
1058
+ cmd.extend(["--system", system_prompt])
1059
+ return cmd
1060
+
1061
+ def _stdin_payload(self, system_prompt: str, user_prompt: str) -> str | None: # noqa: ARG002
1062
+ return user_prompt
1063
+
1064
+ def _parse_output(self, stdout: str, stderr: str) -> CouncilResponse: # noqa: ARG002
1065
+ envelope = json.loads(stdout)
1066
+ if not isinstance(envelope, dict):
1067
+ raise ValueError("expected JSON object at the top level of gemini CLI output")
1068
+ text = str(envelope.get("response", "")).strip()
1069
+ input_tokens = 0
1070
+ output_tokens = 0
1071
+ stats = envelope.get("stats") or {}
1072
+ if isinstance(stats, dict):
1073
+ models = stats.get("models") or {}
1074
+ if isinstance(models, dict):
1075
+ # gemini emits per-model token counts; pick the configured model
1076
+ # if present, else sum across all models in the envelope.
1077
+ model_stats = models.get(self.model)
1078
+ if not isinstance(model_stats, dict):
1079
+ model_stats = next(
1080
+ (v for v in models.values() if isinstance(v, dict)),
1081
+ {},
1082
+ )
1083
+ tokens = (model_stats.get("tokens") or {}) if isinstance(model_stats, dict) else {}
1084
+ if isinstance(tokens, dict):
1085
+ input_tokens = int(tokens.get("prompt", 0) or 0)
1086
+ output_tokens = int(tokens.get("candidates", 0) or 0)
1087
+ meta: dict[str, object] = {}
1088
+ session_id = envelope.get("sessionId") or envelope.get("session_id")
1089
+ if session_id:
1090
+ meta["session_id"] = str(session_id)
1091
+ return CouncilResponse(
1092
+ provider=self.name, model=self.model, text=text,
1093
+ input_tokens=input_tokens, output_tokens=output_tokens,
1094
+ metadata=meta,
1095
+ )
1096
+
1097
+
1098
+ class XAICliClient(CliClient):
1099
+ """xAI Grok via the community `grok` CLI (Superagent project).
1100
+
1101
+ Community-maintained wrapper around the xAI API — **not** an
1102
+ official subscription transport. The CLI consumes ``XAI_API_KEY``
1103
+ from its own environment, so every call is paid per-token exactly
1104
+ as ``XAIClient`` (api transport) would be. ``mode: cli`` here is
1105
+ an ergonomic shortcut for users who already drive Grok from the
1106
+ shell; it does NOT bypass the USD cost gate.
1107
+
1108
+ Invokes ``grok -p <prompt>``. Output is plain text — no JSON
1109
+ envelope. ``_parse_output`` returns the trimmed stdout and
1110
+ estimates token counts heuristically (chars / 4) for the
1111
+ audit-trail; estimates feed the post-call spend tracker, not the
1112
+ pre-call gate (the orchestrator's ``estimate()`` already projects
1113
+ cost from the prompt before this client is invoked).
1114
+ """
1115
+
1116
+ name = "xai"
1117
+ default_binary = "grok"
1118
+ billable = True # community CLI consumes an API key — billable applies
1119
+
1120
+ _AUTH_FAILURE_PATTERNS = CliClient._AUTH_FAILURE_PATTERNS + (
1121
+ "xai_api_key", "401", "unauthorized",
1122
+ )
1123
+
1124
+ def __init__(
1125
+ self,
1126
+ *,
1127
+ model: str = DEFAULT_XAI_MODEL,
1128
+ binary: str | None = None,
1129
+ timeout_seconds: float = DEFAULT_CLI_TIMEOUT_SECONDS,
1130
+ max_calls_per_day: int | None = None,
1131
+ warn_at: float = 0.8,
1132
+ cli_calls_path: Path | None = None,
1133
+ ):
1134
+ super().__init__(
1135
+ model=model,
1136
+ binary=binary,
1137
+ timeout_seconds=timeout_seconds,
1138
+ max_calls_per_day=max_calls_per_day,
1139
+ warn_at=warn_at,
1140
+ cli_calls_path=cli_calls_path,
1141
+ )
1142
+
1143
+ def _build_command(
1144
+ self, system_prompt: str, user_prompt: str, max_tokens: int # noqa: ARG002
1145
+ ) -> list[str]:
1146
+ cmd = [self.binary, "-p", user_prompt]
1147
+ if self.model:
1148
+ cmd.extend(["--model", self.model])
1149
+ return cmd
1150
+
1151
+ def _parse_output(self, stdout: str, stderr: str) -> CouncilResponse: # noqa: ARG002
1152
+ text = stdout.strip()
1153
+ # Plain-text CLIs surface no token usage — estimate from text
1154
+ # length so the audit trail and post-call tracker stay populated.
1155
+ # chars / 4 mirrors `pricing.estimate_input_tokens`.
1156
+ output_tokens = max(1, len(text) // 4) if text else 0
1157
+ return CouncilResponse(
1158
+ provider=self.name, model=self.model, text=text,
1159
+ input_tokens=0, output_tokens=output_tokens,
1160
+ metadata={"cli_output_format": "plain_text", "tokens_estimated": True},
1161
+ )
1162
+
1163
+
1164
+ class PerplexityCliClient(CliClient):
1165
+ """Perplexity via the community `perplexity` CLI (npm package).
1166
+
1167
+ Community-maintained wrapper around the Perplexity API — **not**
1168
+ an official subscription transport. The CLI consumes
1169
+ ``PERPLEXITY_API_KEY`` from its own environment, so every call is
1170
+ paid per-token exactly as ``PerplexityClient`` (api transport)
1171
+ would be. ``mode: cli`` here is an ergonomic shortcut; it does
1172
+ NOT bypass the USD cost gate.
1173
+
1174
+ Invokes ``perplexity -p <prompt>``. Output is plain text — no
1175
+ JSON envelope. Token counts are estimated heuristically for the
1176
+ audit trail; the pre-call cost gate uses the orchestrator's
1177
+ prompt-side estimate.
1178
+ """
1179
+
1180
+ name = "perplexity"
1181
+ default_binary = "perplexity"
1182
+ billable = True # community CLI consumes an API key — billable applies
1183
+
1184
+ _AUTH_FAILURE_PATTERNS = CliClient._AUTH_FAILURE_PATTERNS + (
1185
+ "perplexity_api_key", "401", "unauthorized",
1186
+ )
1187
+
1188
+ def __init__(
1189
+ self,
1190
+ *,
1191
+ model: str = DEFAULT_PERPLEXITY_MODEL,
1192
+ binary: str | None = None,
1193
+ timeout_seconds: float = DEFAULT_CLI_TIMEOUT_SECONDS,
1194
+ max_calls_per_day: int | None = None,
1195
+ warn_at: float = 0.8,
1196
+ cli_calls_path: Path | None = None,
1197
+ ):
1198
+ super().__init__(
1199
+ model=model,
1200
+ binary=binary,
1201
+ timeout_seconds=timeout_seconds,
1202
+ max_calls_per_day=max_calls_per_day,
1203
+ warn_at=warn_at,
1204
+ cli_calls_path=cli_calls_path,
1205
+ )
1206
+
1207
+ def _build_command(
1208
+ self, system_prompt: str, user_prompt: str, max_tokens: int # noqa: ARG002
1209
+ ) -> list[str]:
1210
+ cmd = [self.binary, "-p", user_prompt]
1211
+ if self.model:
1212
+ cmd.extend(["--model", self.model])
1213
+ return cmd
1214
+
1215
+ def _parse_output(self, stdout: str, stderr: str) -> CouncilResponse: # noqa: ARG002
1216
+ text = stdout.strip()
1217
+ output_tokens = max(1, len(text) // 4) if text else 0
1218
+ return CouncilResponse(
1219
+ provider=self.name, model=self.model, text=text,
1220
+ input_tokens=0, output_tokens=output_tokens,
1221
+ metadata={"cli_output_format": "plain_text", "tokens_estimated": True},
1222
+ )
1223
+
1224
+
272
1225
  # ── Manual mode (Phase 2b) ───────────────────────────────────────────
273
1226
 
274
1227
 
@@ -307,6 +1260,7 @@ class ManualClient(ExternalAIClient):
307
1260
  """
308
1261
 
309
1262
  billable = False
1263
+ transport = "manual"
310
1264
 
311
1265
  def __init__(
312
1266
  self,