npm - @event4u/agent-config - Versions diffs - 2.11.0 → 2.13.0 - Mend

@event4u/agent-config 2.11.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/.agent-src/commands/council/analysis.md +142 -0
package/.agent-src/commands/council/debate.md +129 -0
package/.agent-src/commands/council/default.md +8 -0
package/.agent-src/commands/council/design.md +16 -12
package/.agent-src/commands/council/optimize.md +16 -15
package/.agent-src/commands/council/pr.md +12 -12
package/.agent-src/commands/council.md +48 -2
package/.agent-src/personas/advisors/contrarian.md +95 -0
package/.agent-src/personas/advisors/executor.md +99 -0
package/.agent-src/personas/advisors/expansionist.md +98 -0
package/.agent-src/personas/advisors/first-principles.md +98 -0
package/.agent-src/personas/advisors/outsider.md +102 -0
package/.agent-src/rules/copilot-routing.md +19 -0
package/.agent-src/rules/devcontainer-routing.md +20 -0
package/.agent-src/rules/laravel-routing.md +20 -0
package/.agent-src/rules/symfony-routing.md +20 -0
package/.agent-src/skills/ai-council/SKILL.md +180 -2
package/.agent-src/skills/canvas-design/SKILL.md +132 -0
package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
package/.agent-src/skills/copilot-config/SKILL.md +1 -1
package/.agent-src/skills/devcontainer/SKILL.md +1 -1
package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
package/.agent-src/skills/laravel/SKILL.md +1 -1
package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
package/.agent-src/skills/skill-writing/SKILL.md +101 -16
package/.agent-src/skills/sql-writing/SKILL.md +1 -1
package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
package/.claude-plugin/marketplace.json +5 -1
package/AGENTS.md +1 -1
package/CHANGELOG.md +78 -0
package/CONTRIBUTING.md +5 -0
package/README.md +3 -3
package/config/agent-settings.template.yml +5 -84
package/docs/architecture/multi-tool-projection.md +53 -0
package/docs/architecture/{compression.md → source-projection.md} +21 -3
package/docs/architecture.md +6 -6
package/docs/catalog.md +21 -11
package/docs/contracts/adr-architectural-consensus-mechanism.md +67 -0
package/docs/contracts/adr-level-6-productization.md +2 -2
package/docs/contracts/ai-council-config.md +186 -0
package/docs/contracts/command-clusters.md +57 -1
package/docs/contracts/multi-tool-projection-fidelity.md +109 -0
package/docs/getting-started.md +2 -2
package/package.json +1 -1
package/scripts/_archive/README.md +59 -0
package/scripts/ai_council/_default_prices.py +10 -1
package/scripts/ai_council/advisors.py +148 -0
package/scripts/ai_council/clients.py +189 -4
package/scripts/ai_council/config.py +368 -0
package/scripts/ai_council/consensus.py +290 -0
package/scripts/ai_council/orchestrator.py +634 -16
package/scripts/ai_council/prompts.py +335 -0
package/scripts/check_compressed_paths.py +6 -1
package/scripts/check_references.py +25 -0
package/scripts/ci_time_ratio.py +168 -0
package/scripts/council_cli.py +1007 -32
package/scripts/measure_projection_bytes.py +159 -0
package/scripts/measure_roadmap_trajectory.py +112 -0
package/scripts/probe_projection_fidelity.py +202 -0
package/scripts/run_skill_evals.py +185 -0
package/scripts/schemas/skill.schema.json +4 -0
package/scripts/score_skill_selection.py +198 -0
package/scripts/skill_collision_clusters.py +162 -0
package/scripts/skill_linter.py +71 -1
/package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
/package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
/package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
/package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
/package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
/package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
/package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0

package/docs/contracts/multi-tool-projection-fidelity.md ADDED Viewed

@@ -0,0 +1,109 @@
+# Multi-Tool Projection Fidelity Contract
+**Status:** beta · **Phase 4 of [step-1-v2-feedback-followup](../../agents/roadmaps/step-1-v2-feedback-followup.md)**
+Names the **per-tool guarantees** the projection pipeline (`scripts/compress.py --sync` + `scripts/compress.py --generate-tools`) actually delivers. Byte-equivalence is not behaviour-fidelity — each consumer tool has its own frontmatter grammar, its own activation model, and its own surface for skills / rules / commands.
+## Source of truth
+Every projection starts from `.agent-src/` (compressed) which is generated from `.agent-src.uncompressed/`. The projection layer **never** writes to source; it only reads.
+## Per-tool projection map
+| Tool | Rules surface | Skills surface | Commands surface | Frontmatter grammar |
+|---|---|---|---|---|
+| **Augment** (host) | `.augment/rules/*.md` (copies; symlink opt-in via `augment.rules_use_symlinks`) | `.augment/skills/<name>/SKILL.md` (symlink → `.agent-src/skills/`) | `.augment/commands/*.md` | full source frontmatter preserved |
+| **Claude** (Code + Desktop) | `.claude/rules/*.md` | `.claude/skills/<name>/SKILL.md` | `.claude/skills/<name>/SKILL.md` (commands rendered as skills) | full source frontmatter preserved |
+| **Cursor** | `.cursor/rules/*.mdc` + legacy `.md` symlinks (130 files = 65 × 2) | **not projected** | `.cursor/commands/*.md` | `description`, `globs`, `alwaysApply` only — `triggers`, `routes_to`, `tier`, `type` are **dropped** |
+| **Windsurf** | `.windsurfrules` (single concatenated file) + `.windsurf/rules/*.md` (per-rule) | **not projected** | `.windsurf/workflows/*.md` | concatenated body; per-rule frontmatter only retained in `.windsurf/rules/`, not in the legacy `.windsurfrules` single-file |
+| **Cline** | `.clinerules/*.md` | **not projected** | **not projected** | full router frontmatter preserved (`type`, `tier`, `description`, `triggers`, `routes_to`) |
+| **Gemini** | `GEMINI.md` (single-file digest) | embedded inline | embedded inline | digest only — no per-rule frontmatter |
+| **Copilot** | `AGENTS.md` / `copilot-instructions.md` | embedded inline | embedded inline | digest only |
+`AGENTS.md` is the **tool-agnostic root pointer** and exists at workspace root regardless of which projections are enabled.
+## Fidelity guarantees per axis
+### 1. Rule body fidelity
+| Tool | Body identical to source? |
+|---|---|
+| Augment | yes (copy or symlink) |
+| Claude | yes (copy) |
+| Cursor `.mdc` | yes |
+| Cline | yes |
+| Windsurf single-file | concatenated, separator `---` between rules |
+| Windsurf per-rule | yes |
+| Gemini / Copilot digest | summarised — **no fidelity guarantee** |
+### 2. Trigger fidelity (`triggers:` keyword / `path_prefix`)
+| Tool | `triggers:` preserved? |
+|---|---|
+| Augment, Claude, Cline, Windsurf-per-rule | **yes** — the host LLM sees the trigger set verbatim |
+| Cursor `.mdc` | **no** — Cursor's frontmatter grammar does not honour `triggers:`; activation falls back to `globs:` + `alwaysApply: <bool>` + description match |
+| Windsurf single-file `.windsurfrules` | **no** — concatenated body strips per-rule frontmatter |
+| Gemini, Copilot | **no** — digest format |
+**Consequence:** rules that depend on `triggers:` for activation (tier-2a path-prefix routing, tier-3 keyword routing) **silently degrade on Cursor and on the Windsurf single-file**. They still appear in body, but the host must infer activation from prose.
+### 3. `routes_to:` fidelity
+Same matrix as `triggers:` — preserved on Augment, Claude, Cline, Windsurf-per-rule; **dropped** on Cursor `.mdc` and Windsurf single-file.
+**Consequence:** the four tier-3 routing rules (`laravel-routing`, `symfony-routing`, `copilot-routing`, `devcontainer-routing`) added in Phase 3.3 will route deterministically on Augment / Claude / Cline; on Cursor / Windsurf-single-file the host must rely on description matching alone.
+### 4. Skill surface
+Cursor, Windsurf, Cline, Gemini, Copilot have **no native skill surface**. Skills are projected only for Augment and Claude. Consumers on the other tools see skill content only indirectly (via rule bodies that cite skills, or via the catalogue in `AGENTS.md`).
+### 5. Command surface
+| Tool | Where commands appear |
+|---|---|
+| Augment | `.augment/commands/*.md` (native slash-command surface) |
+| Claude | `.claude/skills/<command>/SKILL.md` (commands rendered as skills with `disable-model-invocation: true`) |
+| Cursor | `.cursor/commands/*.md` (106 files) |
+| Windsurf | `.windsurf/workflows/*.md` (106 files) |
+| Cline | none |
+| Gemini, Copilot | listed only inside `AGENTS.md` / `GEMINI.md` digest |
+## Automated probe — `task lint-projection-fidelity`
+`scripts/probe_projection_fidelity.py` reads `tests/fixtures/projection_fidelity/fixtures.yml` and asserts the per-tool guarantees above against the actual projected trees. The fixture covers five representative artefacts:
+| Fixture entry | Tier | Stress-tests |
+|---|---|---|
+| `rule:non-destructive-by-default` | kernel | always-active body fidelity across all five rule surfaces |
+| `rule:laravel-translations` | tier-2a | `path_prefix:` trigger preservation (Cline) vs drop (Cursor) |
+| `rule:laravel-routing` | tier-3 | `routes_to:` preservation (Cline) vs drop (Cursor, Windsurf-single) |
+| `skill:laravel` | skill | Augment + Claude only; rationale for absence on others |
+| `command:commit` | command | per-tool command surface divergence |
+Run: `python3 scripts/probe_projection_fidelity.py` — exits non-zero on any divergence. Report at `agents/reports/projection-fidelity.json`.
+## Known divergences (do not file as bugs)
+These are **architectural facts**, not regressions. They are documented so installers and consumers know what to expect.
+1. **Cursor `.mdc` drops router metadata.** Cursor's third-party rule format only honours `description`, `globs`, `alwaysApply`. Adding `triggers:` or `routes_to:` to a Cursor rule has no effect at activation time. The body still loads when the description matches; the deterministic routing layer does not.
+2. **Windsurf single-file (`.windsurfrules`) strips per-rule frontmatter.** Legacy compatibility surface. The new `.windsurf/rules/*.md` per-rule files preserve the full frontmatter — consumers should prefer those.
+3. **Skills do not project to Cursor / Windsurf / Cline / Gemini / Copilot.** These tools have no native skill loader. Skill content reaches consumers indirectly via rule bodies and the `AGENTS.md` catalogue.
+4. **Augment historically did not load symlinked rules.** Default is to **copy** rules into `.augment/rules/`. Opt into symlinks via `augment.rules_use_symlinks: true` in `.agent-settings.yml`.
+5. **`task generate-tools` does not refresh `.augment/rules/`.** Only `task sync` (== `scripts/compress.py --sync`) copies rules into the Augment tree. Investigators who edit a rule, run only `generate-tools`, and then `ls .augment/rules/` will see stale state.
+## Acceptance criteria for this contract
+- [x] Fixture under `tests/fixtures/projection_fidelity/`
+- [x] Probe script under `scripts/probe_projection_fidelity.py`
+- [x] Report under `agents/reports/projection-fidelity.json`
+- [x] Per-tool guarantee table above
+- [x] Known-divergence list above
+## Related
+- [`source-projection`](../architecture/source-projection.md) — pipeline A (source compression)
+- [`augment-projection`](../architecture/augment-projection.md) — pipeline B (Augment-specific)
+- [`multi-tool-projection`](../architecture/multi-tool-projection.md) — pipeline C (the per-tool emitters)
+- [`rule-router`](rule-router.md) — the `triggers:` / `routes_to:` grammar this contract pins
+- [`agents/council-sessions/2026-05-14-v2-analysis/feedback/09-cross-tool-projection-fidelity.md`](../../agents/council-sessions/2026-05-14-v2-analysis/feedback/09-cross-tool-projection-fidelity.md) — origin council feedback

package/docs/getting-started.md CHANGED Viewed

@@ -106,7 +106,7 @@ Your agent is now:
 - **Respecting your codebase** — no conflicting patterns
 - **Following standards** — consistent code quality
-This is enforced automatically by 61 rules. No configuration needed.
+This is enforced automatically by 65 rules. No configuration needed.
 ---
@@ -146,7 +146,7 @@ Your agent now understands slash commands:
 | `/quality-fix` | Run and fix all quality checks |
 | `/chat-history` | Inspect the persistent chat-history log (read-only `show`) |
-→ [Browse all 106 active commands](../.agent-src/commands/)
+→ [Browse all 108 active commands](../.agent-src/commands/)
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@event4u/agent-config",
-    "version": "2.11.0",
+    "version": "2.13.0",
     "description": "Shared agent configuration \u2014 skills, rules, commands, guidelines, and templates for AI coding tools",
     "license": "MIT",
     "private": false,

package/scripts/_archive/README.md ADDED Viewed

@@ -0,0 +1,59 @@
+# Script Archive — One-Shot Migrations
+This directory preserves migration / bootstrap / back-fill scripts that have
+already run to completion and are no longer invoked by any productive code
+path. They are kept as forensic reference, mirroring the archival convention
+used by [`agents/roadmaps/archive/`](../../agents/roadmaps/archive/).
+**Do not run these scripts.** They are one-shot transformations whose
+target state is already the working tree. Re-running them on the current
+codebase is undefined behaviour.
+## Provenance
+Archived 2026-05-14 as part of [`agents/roadmaps/step-1-v2-feedback-followup.md`](../../agents/roadmaps/step-1-v2-feedback-followup.md)
+Phase 1 Step 3, addressing audit finding F3 / council finding C3 from
+[`agents/council-sessions/2026-05-14-v2-analysis/feedback/03-migration-scripts-archival.md`](../../agents/council-sessions/2026-05-14-v2-analysis/feedback/03-migration-scripts-archival.md).
+## Inventory
+| Script | Migration / phase served | What it did |
+|---|---|---|
+| [`_backfill_skill_domains.py`](_backfill_skill_domains.py) | B3 domain back-fill | Injected `domain:` frontmatter into every `SKILL.md`. Source of truth now lives in each skill's frontmatter directly. |
+| [`_bootstrap_tier_frontmatter.py`](_bootstrap_tier_frontmatter.py) | Tier-frontmatter bootstrap | Injected `tier: N` frontmatter into every slash command during the kernel / tier-1 / tier-2 routing introduction. |
+| [`_p43_bodies.py`](_p43_bodies.py) | Phase 4.3 — rule-body compression | Wrote compressed rule bodies after `_p43_compress.py` produced the manifest. Paired with `_p43_compress.py`. |
+| [`_p43_compress.py`](_p43_compress.py) | Phase 4.3 — rule-body compression | Surgical compression of 22 `compress-and-keep` auto-rules; produced the manifest consumed by `_p43_bodies.py`. |
+| [`_p4_migrate.py`](_p4_migrate.py) | Phase 4.1 + 4.2 — rule reclassification | Migrated rules into the skill / guideline / command / contract-stub split that the package ships today. |
+| [`_phase2_shim_helper.py`](_phase2_shim_helper.py) | Phase 2 — deprecation shim | One-shot helper that injected `superseded_by:` + `deprecated_in:` + deprecation warning into rules retired during Phase 2. |
+| [`_pilot_council_question.py`](_pilot_council_question.py) | Phase 1 pilot — kernel-membership council prep | Built the Phase-1 council question file used for the kernel-membership R1/R2 cross-check. The resulting council artefacts live under `agents/council-sessions/20260506T*`. |
+## Why these stayed live and were NOT archived
+The 2026-05-14 audit (F3) listed 9 candidate scripts. Two of those turn out
+to have productive (non-incestuous) references and remain in `scripts/`:
+- **`scripts/_emit_domain_table.py`** — cited as the regeneration command in
+  [`docs/contracts/skill-domains.md`](../../docs/contracts/skill-domains.md)
+  ("regenerate via `python3 scripts/_emit_domain_table.py`"). The
+  domain-table snapshot is a derived view that the contract doc explicitly
+  expects to be regenerable from this script.
+- **`scripts/_pilot_measure.py`** — cited by
+  [`docs/contracts/kernel-membership.md`](../../docs/contracts/kernel-membership.md)
+  as the reproducibility-verification command for the kernel pilot SHAs, and
+  its algorithm is mirrored by [`scripts/iron_law_sha.py`](../iron_law_sha.py).
+  Both productive paths assume the script remains in place.
+The audit's F3 framing ("zero productive references") was correct for the 7
+archived scripts and wrong for these 2. Recorded here so the F3 finding is
+not re-litigated without context.
+## How to restore one (if a future migration needs it)
+```bash
+git mv scripts/_archive/<script>.py scripts/
+git commit -m "chore(scripts): restore <script> for <reason>"
+```
+Restoration should come with an issue / PR explaining why the historical
+one-shot is being reused — by construction these scripts assume their
+pre-migration starting state.

package/scripts/ai_council/_default_prices.py CHANGED Viewed

@@ -17,7 +17,7 @@ from __future__ import annotations
 # YYYY-MM-DD of when this table was last hand-edited. Keep in sync with
 # the test_default_prices freshness assertion if you bump this.
-LAST_UPDATED = "2026-04-29"
+LAST_UPDATED = "2026-05-14"
 # (provider, model)  ->  (input_per_1m_usd, output_per_1m_usd)
 DEFAULT_PRICES: dict[tuple[str, str], tuple[float, float]] = {
@@ -30,6 +30,15 @@ DEFAULT_PRICES: dict[tuple[str, str], tuple[float, float]] = {
     ("openai", "gpt-4o-mini"): (0.15, 0.60),
     ("openai", "o1"): (15.00, 60.00),
     ("openai", "o3-mini"): (1.10, 4.40),
+    # ── Google Gemini ────────────────────────────────────────────────
+    ("gemini", "gemini-2.5-pro"): (1.25, 10.00),
+    ("gemini", "gemini-2.5-flash"): (0.30, 2.50),
+    # ── xAI Grok ─────────────────────────────────────────────────────
+    ("xai", "grok-4"): (3.00, 15.00),
+    ("xai", "grok-3-mini"): (0.30, 0.50),
+    # ── Perplexity ───────────────────────────────────────────────────
+    ("perplexity", "sonar-pro"): (3.00, 15.00),
+    ("perplexity", "sonar"): (1.00, 1.00),
 }

package/scripts/ai_council/advisors.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Thinking-style advisors — replace-mode call planning (Phase 6).
+When `agents/.ai-council.yml` enables an advisor (e.g. `contrarian`
+bound to `member: anthropic`), the orchestrator REPLACES the matching
+plain-member call with an advisor-persona call on the same provider.
+Same total call count as a plain run; bounded extra cost beyond the
+persona-prompt token delta.
+This module owns:
+- `AdvisorPlan`  — resolved swap for a single provider (persona text,
+  display name, optional model override).
+- `plan_advisor_swap()` — walks the enabled advisors, reads their
+  persona files, and returns the per-provider plan map consumed by
+  `orchestrator.consult()` / `estimate()` and by the CLI.
+- `resolve_persona_text()` — reads a persona file with compressed-tree
+  preference and frontmatter strip.
+Cross-validation against the members block already ran at config load
+(`config._build_config`); this module trusts that contract and only
+enforces the **one-advisor-per-provider** rule (replace-mode invariant).
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from pathlib import Path
+import yaml
+from scripts.ai_council.config import AdvisorConfig, CouncilConfigError
+@dataclass(frozen=True)
+class AdvisorPlan:
+    """Resolved advisor swap for a single provider."""
+    name: str
+    display_name: str
+    member: str
+    persona_text: str
+    model_override: str | None = None
+_FRONTMATTER_RE = re.compile(r"\A---\n(.*?)\n---\n", re.DOTALL)
+def _split_frontmatter(raw: str) -> tuple[dict, str]:
+    """Return ``(frontmatter_dict, body)``. Missing frontmatter → ``({}, raw)``."""
+    match = _FRONTMATTER_RE.match(raw)
+    if not match:
+        return {}, raw
+    try:
+        meta = yaml.safe_load(match.group(1)) or {}
+    except yaml.YAMLError:
+        meta = {}
+    if not isinstance(meta, dict):
+        meta = {}
+    body = raw[match.end():]
+    return meta, body
+def _display_name_from(advisor_name: str, frontmatter: dict) -> str:
+    """Prefer frontmatter ``role``; fall back to titleized advisor key."""
+    role = frontmatter.get("role")
+    if isinstance(role, str) and role.strip():
+        return role.strip()
+    return advisor_name.replace("-", " ").replace("_", " ").title()
+def resolve_persona_text(
+    persona_path: str,
+    repo_root: Path,
+) -> tuple[str, dict]:
+    """Read a persona file, returning ``(body, frontmatter)``.
+    Compressed tree (``.agent-src/``) wins so production runs match the
+    same projection the rest of the package consumes. Uncompressed tree
+    (``.agent-src.uncompressed/``) is the fallback for in-repo
+    development before ``task sync`` has projected the file.
+    """
+    candidates = [
+        repo_root / ".agent-src" / persona_path,
+        repo_root / ".agent-src.uncompressed" / persona_path,
+    ]
+    for candidate in candidates:
+        if candidate.exists():
+            raw = candidate.read_text(encoding="utf-8")
+            meta, body = _split_frontmatter(raw)
+            return body.strip(), meta
+    searched = "\n  - ".join(str(c) for c in candidates)
+    raise CouncilConfigError(
+        f"Persona file not found for advisor (path={persona_path!r}). "
+        f"Searched:\n  - {searched}"
+    )
+def plan_advisor_swap(
+    advisors: dict[str, AdvisorConfig],
+    repo_root: Path,
+) -> dict[str, AdvisorPlan]:
+    """Return ``{provider_name: AdvisorPlan}`` for every ENABLED advisor.
+    Two enabled advisors targeting the same provider is a
+    ``CouncilConfigError`` — replace-mode runs one advisor per provider
+    so the call plan never doubles up by accident.
+    """
+    plans: dict[str, AdvisorPlan] = {}
+    for adv in advisors.values():
+        if not adv.enabled:
+            continue
+        if adv.member in plans:
+            existing = plans[adv.member].name
+            raise CouncilConfigError(
+                f"advisors.{adv.name} and advisors.{existing} both bind "
+                f"member={adv.member!r}; only one advisor per provider "
+                f"per run (replace-mode invariant)."
+            )
+        body, meta = resolve_persona_text(adv.persona, repo_root)
+        plans[adv.member] = AdvisorPlan(
+            name=adv.name,
+            display_name=_display_name_from(adv.name, meta),
+            member=adv.member,
+            persona_text=body,
+            model_override=adv.model,
+        )
+    return plans
+def build_persona_labels(
+    plans: dict[str, AdvisorPlan],
+    members: list,
+) -> dict[str, str]:
+    """Build the peer-review ``source → display_name`` map.
+    ``source`` is the ``provider:model`` string the peer-review
+    pipeline uses for anonymisation; ``members`` is the post-swap
+    member list (model_override already applied), so the model field
+    matches what the response carries.
+    """
+    labels: dict[str, str] = {}
+    for m in members:
+        plan = plans.get(m.name)
+        if plan is None:
+            continue
+        labels[f"{m.name}:{m.model}"] = plan.display_name
+    return labels

package/scripts/ai_council/clients.py CHANGED Viewed

@@ -52,6 +52,28 @@ def _resolve_key_path(filename: str) -> Path:
 DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-5"
 DEFAULT_OPENAI_MODEL = "gpt-4o"
+DEFAULT_GEMINI_MODEL = "gemini-2.5-pro"
+DEFAULT_XAI_MODEL = "grok-4"
+DEFAULT_PERPLEXITY_MODEL = "sonar-pro"
+#: OpenAI-API-compatible endpoints. xAI and Perplexity both expose the
+#: ``/v1/chat/completions`` shape, so their clients reuse the ``openai``
+#: SDK with a custom ``base_url``. Gemini has its own SDK (``google-genai``).
+XAI_BASE_URL = "https://api.x.ai/v1"
+PERPLEXITY_BASE_URL = "https://api.perplexity.ai"
+#: Per-call output budget when no caller-supplied value reaches `ask()`.
+#: The CLI resolves the live default from `ai_council.max_output_tokens`
+#: in `.agent-settings.yml`; this constant is only the abstract-base /
+#: direct-API fallback when nothing else is wired up.
+DEFAULT_MAX_TOKENS = 2048
+#: Expansion target when the user sets `max_output_tokens: 0` ("unlimited")
+#: in settings. Anthropic requires `max_tokens` to be a positive integer,
+#: so 0 is widened to this safe ceiling before the SDK call. Big enough
+#: for current frontier models (Sonnet/GPT-4o headroom ≥ 16k); raise
+#: explicitly in settings if a larger budget is genuinely needed.
+UNLIMITED_TOKENS_FALLBACK = 16384
 # OpenAI reasoning models (o1, o3, o4 families) reject `max_tokens` and the
 # `system` role; they require `max_completion_tokens` and accept only `user`
@@ -128,7 +150,7 @@ class ExternalAIClient(ABC):
         self,
         system_prompt: str,
         user_prompt: str,
-        max_tokens: int = 1024,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
     ) -> CouncilResponse:
         """Send one independent query. Must never raise on network/API
         failure — return a `CouncilResponse` with `error` set instead.
@@ -162,7 +184,7 @@ class AnthropicClient(ExternalAIClient):
             ) from exc
         self._client = anthropic.Anthropic(api_key=api_key)
-    def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> CouncilResponse:
+    def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS) -> CouncilResponse:
         t0 = time.monotonic()
         try:
             response = self._client.messages.create(
@@ -218,7 +240,7 @@ class OpenAIClient(ExternalAIClient):
             ) from exc
         self._client = openai.OpenAI(api_key=api_key)
-    def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = 1024) -> CouncilResponse:
+    def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS) -> CouncilResponse:
         t0 = time.monotonic()
         kwargs: dict[str, object] = {"model": self.model}
         if _is_reasoning_model(self.model):
@@ -256,6 +278,169 @@ class OpenAIClient(ExternalAIClient):
         )
+# ── Gemini / xAI / Perplexity (Phase 0 — Step 6) ─────────────────────
+class GeminiClient(ExternalAIClient):
+    """Google Gemini via the ``google-genai`` SDK.
+    Lazy-imports ``google.genai`` on first ``ask()`` so disabled
+    members do not require the SDK to be installed. Tests inject a
+    mock client shaped like ``genai.Client(api_key=...)`` —
+    ``self._client.models.generate_content(...)`` returns an object
+    with ``.text`` and ``.usage_metadata.{prompt_token_count,
+    candidates_token_count}``.
+    """
+    name = "gemini"
+    billable = True
+    def __init__(
+        self,
+        model: str = DEFAULT_GEMINI_MODEL,
+        client: object = None,
+        api_key: str | None = None,
+    ):
+        self.model = model
+        if client is not None:
+            self._client = client
+            return
+        if api_key is None:
+            raise RuntimeError(
+                "GeminiClient requires explicit api_key or injected client. "
+                "Use `api_key_ref: env:GEMINI_API_KEY` in agents/.ai-council.yml."
+            )
+        try:
+            from google import genai  # type: ignore[import-not-found]
+        except ImportError as exc:  # pragma: no cover - exercised only with real SDK
+            raise RuntimeError(
+                "google-genai package not installed. `pip install google-genai`."
+            ) from exc
+        self._client = genai.Client(api_key=api_key)
+    def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS) -> CouncilResponse:
+        t0 = time.monotonic()
+        contents = f"{system_prompt}\n\n---\n\n{user_prompt}"
+        try:
+            response = self._client.models.generate_content(
+                model=self.model,
+                contents=contents,
+                config={"max_output_tokens": max_tokens},
+            )
+        except Exception as exc:  # noqa: BLE001 - normalise all SDK errors
+            return CouncilResponse(
+                provider=self.name, model=self.model, text="",
+                latency_ms=int((time.monotonic() - t0) * 1000),
+                error=f"{type(exc).__name__}: {exc}",
+            )
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        text = getattr(response, "text", "") or ""
+        usage = getattr(response, "usage_metadata", None)
+        return CouncilResponse(
+            provider=self.name, model=self.model, text=text,
+            input_tokens=getattr(usage, "prompt_token_count", 0) if usage else 0,
+            output_tokens=getattr(usage, "candidates_token_count", 0) if usage else 0,
+            latency_ms=latency_ms,
+        )
+class _OpenAICompatibleClient(ExternalAIClient):
+    """Shared shape for OpenAI-API-compatible providers (xAI, Perplexity).
+    Both vendors implement ``/v1/chat/completions`` and accept the
+    ``openai`` Python SDK with a custom ``base_url``. The reasoning-
+    model branch from :class:`OpenAIClient` is intentionally omitted —
+    neither xAI nor Perplexity ships a reasoning model that requires
+    ``max_completion_tokens`` as of 2026-05-14.
+    """
+    billable = True
+    base_url: str = ""
+    def __init__(
+        self,
+        model: str,
+        client: object = None,
+        api_key: str | None = None,
+    ):
+        self.model = model
+        if client is not None:
+            self._client = client
+            return
+        if api_key is None:
+            raise RuntimeError(
+                f"{type(self).__name__} requires explicit api_key or injected client."
+            )
+        try:
+            import openai  # type: ignore[import-not-found]
+        except ImportError as exc:  # pragma: no cover - exercised only with real SDK
+            raise RuntimeError(
+                "openai package not installed. `pip install openai`."
+            ) from exc
+        self._client = openai.OpenAI(api_key=api_key, base_url=self.base_url)
+    def ask(self, system_prompt: str, user_prompt: str, max_tokens: int = DEFAULT_MAX_TOKENS) -> CouncilResponse:
+        t0 = time.monotonic()
+        try:
+            response = self._client.chat.completions.create(
+                model=self.model,
+                max_tokens=max_tokens,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ],
+            )
+        except Exception as exc:  # noqa: BLE001 - normalise all SDK errors
+            return CouncilResponse(
+                provider=self.name, model=self.model, text="",
+                latency_ms=int((time.monotonic() - t0) * 1000),
+                error=f"{type(exc).__name__}: {exc}",
+            )
+        latency_ms = int((time.monotonic() - t0) * 1000)
+        text = ""
+        choices = getattr(response, "choices", None)
+        if choices:
+            msg = getattr(choices[0], "message", None)
+            text = getattr(msg, "content", "") if msg else ""
+        usage = getattr(response, "usage", None)
+        return CouncilResponse(
+            provider=self.name, model=self.model, text=text or "",
+            input_tokens=getattr(usage, "prompt_tokens", 0) if usage else 0,
+            output_tokens=getattr(usage, "completion_tokens", 0) if usage else 0,
+            latency_ms=latency_ms,
+        )
+class XAIClient(_OpenAICompatibleClient):
+    """xAI Grok via the OpenAI-compatible endpoint at api.x.ai/v1."""
+    name = "xai"
+    base_url = XAI_BASE_URL
+    def __init__(
+        self,
+        model: str = DEFAULT_XAI_MODEL,
+        client: object = None,
+        api_key: str | None = None,
+    ):
+        super().__init__(model=model, client=client, api_key=api_key)
+class PerplexityClient(_OpenAICompatibleClient):
+    """Perplexity via the OpenAI-compatible endpoint at api.perplexity.ai."""
+    name = "perplexity"
+    base_url = PERPLEXITY_BASE_URL
+    def __init__(
+        self,
+        model: str = DEFAULT_PERPLEXITY_MODEL,
+        client: object = None,
+        api_key: str | None = None,
+    ):
+        super().__init__(model=model, client=client, api_key=api_key)
 # ── Manual mode (Phase 2b) ───────────────────────────────────────────
@@ -316,7 +501,7 @@ class ManualClient(ExternalAIClient):
         self,
         system_prompt: str,
         user_prompt: str,
-        max_tokens: int = 1024,  # noqa: ARG002 — accepted for ABC parity
+        max_tokens: int = DEFAULT_MAX_TOKENS,  # noqa: ARG002 — accepted for ABC parity
     ) -> CouncilResponse:
         t0 = time.monotonic()
         rounds: list[str] = []