PyPI - agentshore - Versions diffs - 0.3.2__py3-none-any.whl - Mend

agentshore 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (305) hide show

agentshore/__init__.py +10 -0
agentshore/__main__.py +7 -0
agentshore/agents/__init__.py +14 -0
agentshore/agents/_jsonl.py +117 -0
agentshore/agents/_selection.py +247 -0
agentshore/agents/auth_probe.py +241 -0
agentshore/agents/capabilities.py +55 -0
agentshore/agents/circuit_breaker.py +116 -0
agentshore/agents/cli_agent.py +1763 -0
agentshore/agents/cli_grok.py +215 -0
agentshore/agents/context_writer.py +41 -0
agentshore/agents/costs.py +45 -0
agentshore/agents/git_auth_probe.py +293 -0
agentshore/agents/handle.py +285 -0
agentshore/agents/health.py +158 -0
agentshore/agents/identity.py +1009 -0
agentshore/agents/manager.py +627 -0
agentshore/agents/model_catalog.py +222 -0
agentshore/agents/model_tiers.py +91 -0
agentshore/agents/pricing.py +88 -0
agentshore/agents/registry.py +43 -0
agentshore/agents/worktree/__init__.py +96 -0
agentshore/agents/worktree/allocator.py +842 -0
agentshore/agents/worktree/manager.py +815 -0
agentshore/agents/worktree/reaper.py +417 -0
agentshore/agents/worktree/registry.py +158 -0
agentshore/agents/worktree/rekey.py +214 -0
agentshore/archive.py +136 -0
agentshore/availability.py +172 -0
agentshore/beads/__init__.py +769 -0
agentshore/beads/downloader.py +286 -0
agentshore/beads/setup.py +195 -0
agentshore/budget.py +233 -0
agentshore/cli/__init__.py +47 -0
agentshore/cli/agent_select.py +508 -0
agentshore/cli/caffeinate.py +53 -0
agentshore/cli/commands/__init__.py +3 -0
agentshore/cli/commands/add_budget.py +118 -0
agentshore/cli/commands/dashboard.py +137 -0
agentshore/cli/commands/identity.py +85 -0
agentshore/cli/commands/init.py +359 -0
agentshore/cli/commands/reload_config.py +43 -0
agentshore/cli/commands/start.py +373 -0
agentshore/cli/commands/stop.py +155 -0
agentshore/cli/commands/trusted_ids.py +198 -0
agentshore/cli/constants.py +36 -0
agentshore/cli/helpers.py +280 -0
agentshore/cli/identity_helpers.py +167 -0
agentshore/cli/runtime.py +750 -0
agentshore/cli_helpers.py +259 -0
agentshore/command.py +382 -0
agentshore/config/__init__.py +387 -0
agentshore/config/_parsers.py +1148 -0
agentshore/config/budget_writer.py +62 -0
agentshore/config/coerce.py +15 -0
agentshore/config/models.py +598 -0
agentshore/config/yaml_io.py +113 -0
agentshore/core/__init__.py +15 -0
agentshore/core/base.py +819 -0
agentshore/core/branch_sync.py +220 -0
agentshore/core/context.py +56 -0
agentshore/core/experience_recorder.py +244 -0
agentshore/core/git_safety.py +562 -0
agentshore/core/github_syncer.py +143 -0
agentshore/core/helpers.py +239 -0
agentshore/core/main_repo_guard.py +53 -0
agentshore/core/mixins/__init__.py +10 -0
agentshore/core/mixins/completion.py +1595 -0
agentshore/core/mixins/dispatch.py +902 -0
agentshore/core/mixins/drain.py +724 -0
agentshore/core/mixins/lifecycle.py +302 -0
agentshore/core/mixins/loop.py +1262 -0
agentshore/core/mixins/snapshots.py +580 -0
agentshore/core/mixins/state.py +614 -0
agentshore/core/orchestrator.py +484 -0
agentshore/core/override_queue.py +61 -0
agentshore/core/phases.py +1202 -0
agentshore/core/progress_monitor.py +76 -0
agentshore/core/recovery_tracker.py +77 -0
agentshore/core/session_runtime.py +169 -0
agentshore/core/tick_action.py +100 -0
agentshore/core/trunk_artifacts.py +230 -0
agentshore/core/velocity_tracker.py +122 -0
agentshore/core/wedge_signals.py +419 -0
agentshore/dashboard/__init__.py +7 -0
agentshore/dashboard/bridge.py +645 -0
agentshore/dashboard/lifecycle.py +78 -0
agentshore/dashboard/static/assets/claude-large-humanoid-BaLULVPd.png +0 -0
agentshore/dashboard/static/assets/claude-medium-humanoid-DbrJlRVD.png +0 -0
agentshore/dashboard/static/assets/claude-small-ball-D2RNLllT.png +0 -0
agentshore/dashboard/static/assets/codex-large-humanoid-xUW_cD7j.png +0 -0
agentshore/dashboard/static/assets/codex-medium-humanoid-DPb95Glt.png +0 -0
agentshore/dashboard/static/assets/codex-small-ball-De43mzci.png +0 -0
agentshore/dashboard/static/assets/gemini-large-humanoid-C58wuHUB.png +0 -0
agentshore/dashboard/static/assets/gemini-medium-humanoid-BMWHKEBk.png +0 -0
agentshore/dashboard/static/assets/gemini-small-ball-BPV5H2_p.png +0 -0
agentshore/dashboard/static/assets/grok-large-humanoid-CXhPsrU_.png +0 -0
agentshore/dashboard/static/assets/grok-medium-humanoid-B4bq4AQ8.png +0 -0
agentshore/dashboard/static/assets/grok-small-ball-nhsQ2q79.png +0 -0
agentshore/dashboard/static/assets/index-B-T4jPYV.css +1 -0
agentshore/dashboard/static/assets/index-S2Ik8eSY.js +9 -0
agentshore/dashboard/static/index.html +13 -0
agentshore/data/__init__.py +41 -0
agentshore/data/bootstrap_policy.pt +0 -0
agentshore/data/corruption_evidence.py +235 -0
agentshore/data/integrity.py +460 -0
agentshore/data/migrations/__init__.py +79 -0
agentshore/data/models.py +353 -0
agentshore/data/schema.sql +364 -0
agentshore/data/store/__init__.py +87 -0
agentshore/data/store/base.py +81 -0
agentshore/data/store/core.py +381 -0
agentshore/data/store/helpers.py +220 -0
agentshore/data/store/mixins/__init__.py +1 -0
agentshore/data/store/mixins/agents.py +135 -0
agentshore/data/store/mixins/archive.py +61 -0
agentshore/data/store/mixins/branch_activity.py +63 -0
agentshore/data/store/mixins/external_mutations.py +111 -0
agentshore/data/store/mixins/feedback.py +51 -0
agentshore/data/store/mixins/issues.py +264 -0
agentshore/data/store/mixins/learnings.py +84 -0
agentshore/data/store/mixins/plays.py +265 -0
agentshore/data/store/mixins/pull_requests.py +306 -0
agentshore/data/store/mixins/review_patterns.py +111 -0
agentshore/data/store/mixins/reviews.py +151 -0
agentshore/data/store/mixins/rl.py +129 -0
agentshore/data/store/mixins/scope.py +40 -0
agentshore/data/store/mixins/sessions.py +126 -0
agentshore/data/store/mixins/trajectory.py +60 -0
agentshore/data/store/mixins/work_claims.py +458 -0
agentshore/data/store/mixins/worktrees.py +366 -0
agentshore/data/store/rows.py +409 -0
agentshore/environment.py +17 -0
agentshore/errors.py +308 -0
agentshore/github/__init__.py +7 -0
agentshore/github/adapter.py +695 -0
agentshore/github/labels.py +86 -0
agentshore/github/pr_links.py +153 -0
agentshore/github/trust.py +103 -0
agentshore/identity_names.py +110 -0
agentshore/identity_wizard/__init__.py +30 -0
agentshore/identity_wizard/gh_accounts.py +87 -0
agentshore/identity_wizard/keychain.py +70 -0
agentshore/identity_wizard/report.py +204 -0
agentshore/identity_wizard/wizard.py +626 -0
agentshore/identity_wizard/yaml_patch.py +213 -0
agentshore/ipc/__init__.py +9 -0
agentshore/ipc/commands.py +114 -0
agentshore/ipc/provider.py +186 -0
agentshore/ipc/serializer.py +528 -0
agentshore/ipc/server.py +296 -0
agentshore/ipc/state_writer.py +255 -0
agentshore/ipc/wire.py +49 -0
agentshore/keyring_child.py +131 -0
agentshore/learnings.py +152 -0
agentshore/logging.py +136 -0
agentshore/paths.py +53 -0
agentshore/platform_compat.py +38 -0
agentshore/play_pacing.py +7 -0
agentshore/play_rules.py +80 -0
agentshore/plays/__init__.py +10 -0
agentshore/plays/_publish_reconciler.py +286 -0
agentshore/plays/base.py +146 -0
agentshore/plays/candidates.py +1466 -0
agentshore/plays/dispatch.py +464 -0
agentshore/plays/executor.py +1390 -0
agentshore/plays/internal/__init__.py +3 -0
agentshore/plays/internal/base.py +62 -0
agentshore/plays/internal/end_agent.py +116 -0
agentshore/plays/internal/end_session.py +54 -0
agentshore/plays/internal/instantiate_agent.py +225 -0
agentshore/plays/internal/reserved_action.py +49 -0
agentshore/plays/internal/take_break.py +184 -0
agentshore/plays/override.py +49 -0
agentshore/plays/registry.py +140 -0
agentshore/plays/resolver.py +707 -0
agentshore/plays/scope.py +80 -0
agentshore/plays/selector.py +54 -0
agentshore/plays/skill_backed/__init__.py +7 -0
agentshore/plays/skill_backed/_merge_reconcile.py +149 -0
agentshore/plays/skill_backed/base.py +619 -0
agentshore/plays/skill_backed/calibrate_alignment.py +45 -0
agentshore/plays/skill_backed/cleanup.py +45 -0
agentshore/plays/skill_backed/code_review.py +238 -0
agentshore/plays/skill_backed/design_audit.py +140 -0
agentshore/plays/skill_backed/gates.py +339 -0
agentshore/plays/skill_backed/groom_backlog.py +119 -0
agentshore/plays/skill_backed/issue_pickup.py +237 -0
agentshore/plays/skill_backed/merge_pr.py +256 -0
agentshore/plays/skill_backed/prune.py +57 -0
agentshore/plays/skill_backed/reconcile_state.py +83 -0
agentshore/plays/skill_backed/refine_tasks.py +60 -0
agentshore/plays/skill_backed/run_qa.py +41 -0
agentshore/plays/skill_backed/seed_project.py +196 -0
agentshore/plays/skill_backed/systematic_debugging.py +32 -0
agentshore/plays/skill_backed/unblock_pr.py +165 -0
agentshore/plays/skill_backed/write_plan.py +33 -0
agentshore/power.py +246 -0
agentshore/pr_state.py +123 -0
agentshore/reports/__init__.py +7 -0
agentshore/reports/_aggregations.py +940 -0
agentshore/reports/_loop_incidents.py +142 -0
agentshore/reports/_repo_url.py +66 -0
agentshore/reports/collector.py +279 -0
agentshore/reports/generator.py +159 -0
agentshore/reports/static/__init__.py +1 -0
agentshore/reports/static/chart.min.js +20 -0
agentshore/reports/templates/archive_comparison.html.j2 +238 -0
agentshore/reports/templates/base.html.j2 +102 -0
agentshore/reports/templates/components/charts.html.j2 +88 -0
agentshore/reports/templates/components/tables.html.j2 +195 -0
agentshore/reports/templates/end_session_report.html.j2 +742 -0
agentshore/reports/templates/progress_report.html.j2 +79 -0
agentshore/reports/templates/session_summary.html.j2 +375 -0
agentshore/reports/types.py +297 -0
agentshore/result_parser.py +315 -0
agentshore/rl/__init__.py +58 -0
agentshore/rl/action_space.py +30 -0
agentshore/rl/checkpoint_store.py +255 -0
agentshore/rl/cold_start.py +134 -0
agentshore/rl/config_head.py +65 -0
agentshore/rl/constants.py +10 -0
agentshore/rl/eligibility.py +1052 -0
agentshore/rl/experience.py +198 -0
agentshore/rl/mask.py +639 -0
agentshore/rl/mask_reason.py +129 -0
agentshore/rl/metrics.py +494 -0
agentshore/rl/observation.py +535 -0
agentshore/rl/policy.py +291 -0
agentshore/rl/replay.py +104 -0
agentshore/rl/reward.py +394 -0
agentshore/rl/selector.py +1089 -0
agentshore/rl/training.py +242 -0
agentshore/seed_input.py +122 -0
agentshore/session/__init__.py +3 -0
agentshore/session/bootstrap.py +559 -0
agentshore/session_path.py +883 -0
agentshore/sidecar/__init__.py +7 -0
agentshore/sidecar/__main__.py +7 -0
agentshore/sidecar/agent_auth.py +126 -0
agentshore/sidecar/agents.py +277 -0
agentshore/sidecar/archive_rpc.py +225 -0
agentshore/sidecar/build_id.py +58 -0
agentshore/sidecar/config.py +91 -0
agentshore/sidecar/embedded_bridge.py +133 -0
agentshore/sidecar/esr.py +56 -0
agentshore/sidecar/handshake.py +97 -0
agentshore/sidecar/identities.py +577 -0
agentshore/sidecar/identity_config.py +319 -0
agentshore/sidecar/notification_emitters.py +71 -0
agentshore/sidecar/project.py +796 -0
agentshore/sidecar/recents.py +171 -0
agentshore/sidecar/server.py +1664 -0
agentshore/sidecar/session_lifecycle.py +894 -0
agentshore/sidecar/yaml_edits.py +119 -0
agentshore/skills/__init__.py +231 -0
agentshore/skills/templates/agentshore-calibrate-alignment/SKILL.md +58 -0
agentshore/skills/templates/agentshore-cleanup/SKILL.md +56 -0
agentshore/skills/templates/agentshore-code-review/SKILL.md +85 -0
agentshore/skills/templates/agentshore-design-audit/SKILL.md +88 -0
agentshore/skills/templates/agentshore-groom-backlog/SKILL.md +117 -0
agentshore/skills/templates/agentshore-issue-pickup/SKILL.md +87 -0
agentshore/skills/templates/agentshore-merge-pr/SKILL.md +36 -0
agentshore/skills/templates/agentshore-prune/SKILL.md +57 -0
agentshore/skills/templates/agentshore-reconcile-state/SKILL.md +97 -0
agentshore/skills/templates/agentshore-refine-tasks/SKILL.md +93 -0
agentshore/skills/templates/agentshore-run-qa/SKILL.md +82 -0
agentshore/skills/templates/agentshore-seed-project/SKILL.md +111 -0
agentshore/skills/templates/agentshore-systematic-debugging/SKILL.md +43 -0
agentshore/skills/templates/agentshore-unblock-pr/SKILL.md +72 -0
agentshore/skills/templates/agentshore-write-plan/SKILL.md +95 -0
agentshore/state.py +915 -0
agentshore/subprocess_env.py +369 -0
agentshore/timelapse/__init__.py +192 -0
agentshore/timelapse/setup.py +487 -0
agentshore/ui/__init__.py +8 -0
agentshore/ui/agentshore.tcss +114 -0
agentshore/ui/alignment_levels.py +14 -0
agentshore/ui/app.py +461 -0
agentshore/ui/format.py +10 -0
agentshore/ui/play_labels.py +40 -0
agentshore/ui/provider.py +67 -0
agentshore/ui/screens/__init__.py +21 -0
agentshore/ui/screens/agent_detail.py +85 -0
agentshore/ui/screens/dashboard.py +191 -0
agentshore/ui/screens/escalation.py +88 -0
agentshore/ui/screens/goals.py +59 -0
agentshore/ui/screens/help.py +42 -0
agentshore/ui/screens/issues.py +114 -0
agentshore/ui/screens/shutdown.py +161 -0
agentshore/ui/screens/startup.py +105 -0
agentshore/ui/widgets/__init__.py +3 -0
agentshore/ui/widgets/agent_panel.py +165 -0
agentshore/ui/widgets/alert_bar.py +44 -0
agentshore/ui/widgets/alignment.py +67 -0
agentshore/ui/widgets/budget.py +83 -0
agentshore/ui/widgets/play_history.py +93 -0
agentshore/ui/widgets/rl_state.py +63 -0
agentshore/ui/widgets/work_queue.py +54 -0
agentshore/utils.py +10 -0
agentshore-0.3.2.dist-info/METADATA +125 -0
agentshore-0.3.2.dist-info/RECORD +305 -0
agentshore-0.3.2.dist-info/WHEEL +4 -0
agentshore-0.3.2.dist-info/entry_points.txt +2 -0
agentshore-0.3.2.dist-info/licenses/LICENSE +21 -0

agentshore/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""AgentShore — RL-based multi-agent coding orchestrator."""
+from __future__ import annotations
+from importlib.metadata import PackageNotFoundError, version
+try:
+    __version__ = version("agentshore")
+except PackageNotFoundError:
+    __version__ = "0.0.0"

agentshore/__main__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Allow running as `python -m agentshore`."""
+from __future__ import annotations
+from agentshore.cli import main
+main()

agentshore/agents/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Agent manager — lifecycle and CLI subprocess management."""
+from __future__ import annotations
+from agentshore.agents.capabilities import AGENT_CAPABILITIES
+from agentshore.agents.handle import AgentHandle, AgentInvocationResult
+from agentshore.agents.manager import AgentManager
+__all__ = [
+    "AGENT_CAPABILITIES",
+    "AgentHandle",
+    "AgentInvocationResult",
+    "AgentManager",
+]

agentshore/agents/_jsonl.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Shared JSONL / usage-accounting primitives for the CLI agent adapters.
+The CLI agents (Claude Code, Codex, Gemini, Grok) all emit JSONL on stdout and
+share the same token-usage bookkeeping. These primitives used to live in
+``cli_agent``; ``cli_grok`` imported them from there while ``cli_agent``
+lazily imported ``cli_grok`` back — a circular edge that forced two
+lazy-import wrappers (issue: TNQA finding #6). Hoisting the shared pieces into
+this leaf module breaks the cycle: both adapters import from here, and neither
+imports the other for these helpers.
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+@dataclass(frozen=True, slots=True)
+class _UsageTotals:
+    tokens_in: int = 0
+    tokens_out: int = 0
+    cached_tokens_in: int = 0
+    cache_write_tokens_in: int = 0
+    turn_count: int = 0
+    max_turn_input_tokens: int = 0
+def _iter_json_events(raw: str) -> Iterator[dict[str, object]]:
+    """Yield each non-blank, JSON-decodable line of *raw* as a dict event.
+    The CLI agents all emit JSONL on stdout; this is the single scan loop they
+    share (skip blank lines, ``json.loads``, drop ``JSONDecodeError`` and
+    non-dict payloads) so the per-format parsers only express their own event
+    semantics.
+    """
+    for line in map(str.strip, raw.splitlines()):
+        if not line:
+            continue
+        try:
+            event = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(event, dict):
+            yield event
+def _usage_totals_from_dict(
+    usage: dict[str, object], *, input_includes_cache: bool
+) -> _UsageTotals:
+    total_usage = usage.get("total_token_usage")
+    last_usage = usage.get("last_token_usage")
+    turn_usage: dict[str, object] | None = None
+    if isinstance(total_usage, dict):
+        if isinstance(last_usage, dict):
+            turn_usage = last_usage
+        usage = total_usage
+        input_includes_cache = True
+    elif isinstance(last_usage, dict):
+        usage = last_usage
+        turn_usage = last_usage
+        input_includes_cache = True
+    input_tokens = _first_int(usage, "input_tokens")
+    cache_read_tokens = _safe_int(usage.get("cached_input_tokens")) + _safe_int(
+        usage.get("cache_read_input_tokens")
+    )
+    cache_write_tokens = _first_int(usage, "cache_creation_input_tokens")
+    output_tokens = _first_int(usage, "output_tokens")
+    reasoning_tokens = _first_int(usage, "reasoning_output_tokens")
+    tokens_in = input_tokens if input_includes_cache else input_tokens + cache_read_tokens
+    if not input_includes_cache:
+        tokens_in += cache_write_tokens
+    tokens_out = output_tokens if output_tokens > 0 else reasoning_tokens
+    max_turn_input_tokens = _safe_int(turn_usage.get("input_tokens")) if turn_usage else tokens_in
+    return _UsageTotals(
+        tokens_in=tokens_in,
+        tokens_out=tokens_out,
+        cached_tokens_in=cache_read_tokens,
+        cache_write_tokens_in=cache_write_tokens,
+        max_turn_input_tokens=max_turn_input_tokens,
+    )
+def _max_usage(left: _UsageTotals, right: _UsageTotals) -> _UsageTotals:
+    return _UsageTotals(
+        tokens_in=max(left.tokens_in, right.tokens_in),
+        tokens_out=max(left.tokens_out, right.tokens_out),
+        cached_tokens_in=max(left.cached_tokens_in, right.cached_tokens_in),
+        cache_write_tokens_in=max(left.cache_write_tokens_in, right.cache_write_tokens_in),
+        turn_count=max(left.turn_count, right.turn_count),
+        max_turn_input_tokens=max(left.max_turn_input_tokens, right.max_turn_input_tokens),
+    )
+def _first_int(values: dict[str, object], *keys: str) -> int:
+    for key in keys:
+        parsed = _safe_int(values.get(key))
+        if parsed:
+            return parsed
+    return 0
+def _safe_int(value: object) -> int:
+    if isinstance(value, bool):
+        return int(value)
+    if isinstance(value, int | float | str):
+        try:
+            return int(value)
+        except ValueError:
+            return 0
+    return 0

agentshore/agents/_selection.py ADDED Viewed

@@ -0,0 +1,247 @@
+"""Agent selection helpers — pure rule chain for the AgentManager.
+Rule chain (applied in order):
+  0a. Required-id pin (hard): if ``target_agent_id`` is set, narrow to that
+      single handle. Used by the resolver to pin code-review dispatch to a
+      specific agent whose GH identity has been verified upstream.
+  0b. Required-type pin (hard): if ``target_agent_type`` is set (and no id pin),
+      narrow to that type. Used by ``instantiate_agent`` and similar
+      type-specific plays.
+  1. Anti-confirmation bias (hard): exclude the PR author from CodeReview.
+     QA runs against the merged trunk and has no anti-confirmation; any
+     can_test agent may execute it.
+  2. Exclude list (hard): drop agent types listed in ``preferences.exclude``
+     for this play type.
+  3. Tier eligibility (hard): drop agents whose ``model_tier`` isn't in the
+     allowed set for this play type. Small tier is blocked from any coding
+     or strategic play; large tier is blocked from cheap mechanical plays
+     where the play is explicitly tier-limited.
+  4. AntiConfirmationViolation if no candidates remain after hard filters.
+  5. Branch exposure affinity (soft): promote agents with prior exposure to *branch*.
+  6. Type affinity (soft): promote agents whose type matches
+     ``preferences.affinity`` for this play type.
+  7. Tier cost (soft): prefer cheaper eligible tiers when affinity is tied.
+  8. Least-busy tiebreaker: sort by ascending task history length.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import structlog
+from agentshore.agents.model_tiers import DEFAULT_MODEL_TIER
+from agentshore.errors import AntiConfirmationViolation
+from agentshore.identity_names import same_identity
+from agentshore.state import AgentStatus, PlayType, is_agent_circuit_broken
+if TYPE_CHECKING:
+    from agentshore.agents.handle import AgentHandle
+    from agentshore.config import AgentPreferencesConfig
+_logger = structlog.get_logger(__name__)
+_REVIEW_PLAYS: frozenset[PlayType] = frozenset({PlayType.CODE_REVIEW})
+# Per-play tier eligibility. Plays not listed here accept any tier.
+# Three bands:
+#   - Cheap mechanical work (small ∪ medium): browser checks and
+#     merging already-approved PRs.
+#   - Universal (small ∪ medium ∪ large): cleanup — it's the bootstrap
+#     first-play when the backlog is large, and at that moment only the
+#     large agent has spawned. Excluding large here used to cause the
+#     bootstrap-cleanup to get skip:staffing'd on every fresh open-stocks-
+#     mcp session (seen 2026-05-22). Per the broad-bands philosophy let
+#     PPO learn tier affinity rather than pre-committing.
+#   - Coding & strategic work (medium ∪ large): anything that writes code,
+#     restructures local work, or interprets test failures. Small is too
+#     risky for downstream cost.
+#   - Heavyweight strategic / validation (large only): seed/design audits,
+#     final QA, and global calibration where medium's judgement isn't trusted
+#     to set or certify the trajectory.
+# Medium is the universal fallback for the first three bands.
+_PLAY_ALLOWED_TIERS: dict[PlayType, frozenset[str]] = {
+    PlayType.CLEANUP: frozenset({"small", "medium", "large"}),
+    PlayType.MERGE_PR: frozenset({"small", "medium"}),
+    # Medium ∪ large — coding & strategic
+    PlayType.ISSUE_PICKUP: frozenset({"medium", "large"}),
+    PlayType.UNBLOCK_PR: frozenset({"large", "medium"}),
+    PlayType.CODE_REVIEW: frozenset({"medium", "large"}),
+    PlayType.REFINE_TASK_BREAKDOWN: frozenset({"medium", "large"}),
+    PlayType.RUN_QA: frozenset({"large"}),
+    PlayType.WRITE_IMPLEMENTATION_PLAN: frozenset({"large"}),
+    PlayType.SYSTEMATIC_DEBUGGING: frozenset({"medium", "large"}),
+    # Large only — beads/design-doc audits and final validation.
+    PlayType.SEED_PROJECT: frozenset({"large"}),
+    PlayType.DESIGN_AUDIT: frozenset({"large"}),
+    PlayType.GROOM_BACKLOG: frozenset({"medium", "large"}),
+    PlayType.CALIBRATE_ALIGNMENT: frozenset({"large"}),
+    # RECONCILE_STATE — log-parse + targeted local remediation. Doesn't need
+    # large-tier reasoning; medium suffices and is cheaper when it fires.
+    PlayType.RECONCILE_STATE: frozenset({"medium", "large"}),
+}
+def allowed_tiers_for(play_type: PlayType) -> frozenset[str] | None:
+    """Return the allowed tier set for *play_type*, or None if unrestricted."""
+    return _PLAY_ALLOWED_TIERS.get(play_type)
+def select_agent_for(
+    play_type: PlayType,
+    handles: dict[str, AgentHandle],
+    *,
+    pr_github_author: str | None = None,
+    branch_exposure: dict[str, str] | None = None,
+    preferences: AgentPreferencesConfig | None = None,
+    branch: str | None = None,
+    required_agent_type: str | None = None,
+    required_agent_id: str | None = None,
+) -> AgentHandle:
+    """Return the best available handle for *play_type* using the rule chain.
+    Raises ``AntiConfirmationViolation`` if all candidates are blocked by
+    hard constraints (anti-confirmation or exclude rules).
+    Raises ``AntiConfirmationViolation`` (with a distinct message) if there
+    are no IDLE agents at all.
+    """
+    branch_exposure = branch_exposure or {}
+    # -- Step 0: pool of IDLE handles ----------------------------------------
+    candidates: list[AgentHandle] = [h for h in handles.values() if h.status == AgentStatus.IDLE]
+    if not candidates:
+        raise AntiConfirmationViolation("No IDLE agents available for selection")
+    # -- Step 0a: required-id pin (resolver-chosen reviewer) -----------------
+    # The resolver picks a specific agent for code_review based on GH identity.
+    # When that handle is no longer IDLE (raced with another dispatch), the
+    # play is requeued by the executor — we don't silently fall through to a
+    # different agent that might violate the identity invariant.
+    if required_agent_id is not None:
+        candidates = [h for h in candidates if h.agent_id == required_agent_id]
+        if not candidates:
+            raise AntiConfirmationViolation(f"Pinned agent {required_agent_id!r} is no longer IDLE")
+    # -- Step 0b: required-type constraint (instantiate_agent and similar) ---
+    elif required_agent_type is not None:
+        candidates = [h for h in candidates if h.agent_type.value == required_agent_type]
+        if not candidates:
+            raise AntiConfirmationViolation(
+                f"No IDLE agents of required type {required_agent_type!r} available"
+            )
+    initial_count = len(candidates)
+    # Track which rule eliminated each candidate. Order of keys reflects
+    # filter precedence so a single log line reveals the dominant blocker.
+    eliminated: dict[str, list[str]] = {
+        "anti_confirmation": [],
+        "exclude": [],
+        "tier": [],
+    }
+    # -- Step 1: anti-confirmation hard filter --------------------------------
+    # CODE_REVIEW only: block any agent whose GH identity matches the PR author.
+    # QA runs against the merged trunk; any can_test agent is eligible.
+    # When pr_github_author is None (unknown — pre-session PR not yet refreshed),
+    # all candidates pass here and the executor's identity check acts as backstop.
+    blocked_ids: set[str] = set()
+    if play_type in _REVIEW_PLAYS and pr_github_author is not None:
+        for h in candidates:
+            if same_identity(h.github_identity, pr_github_author):
+                blocked_ids.add(h.agent_id)
+    survivors: list[AgentHandle] = []
+    for h in candidates:
+        if h.agent_id in blocked_ids:
+            eliminated["anti_confirmation"].append(h.agent_id)
+        else:
+            survivors.append(h)
+    candidates = survivors
+    # -- Step 2: exclude list hard filter ------------------------------------
+    excluded_types: set[str] = set()
+    if preferences is not None:
+        for exc_type in preferences.exclude.get(play_type.value, []):
+            excluded_types.add(exc_type)
+    if excluded_types:
+        survivors = []
+        for h in candidates:
+            if h.agent_type.value in excluded_types:
+                eliminated["exclude"].append(h.agent_id)
+            else:
+                survivors.append(h)
+        candidates = survivors
+    # -- Step 3: tier eligibility hard filter --------------------------------
+    allowed_tiers = _PLAY_ALLOWED_TIERS.get(play_type)
+    if allowed_tiers is not None:
+        survivors = []
+        for h in candidates:
+            if (h.model_tier or DEFAULT_MODEL_TIER) not in allowed_tiers:
+                eliminated["tier"].append(h.agent_id)
+            else:
+                survivors.append(h)
+        candidates = survivors
+    if not candidates:
+        _logger.warning(
+            "agent_selection_blocked",
+            play_type=play_type.value,
+            eliminated=eliminated,
+            candidate_count_in=initial_count,
+        )
+        raise AntiConfirmationViolation(
+            f"All agents blocked for {play_type.value!r} — "
+            "anti-confirmation, exclude, or tier-eligibility rules eliminated all candidates"
+        )
+    # -- Step 3: soft scoring (stable sort; lower score = more preferred) ----
+    preferred_type: str | None = None
+    if preferences is not None:
+        preferred_type = preferences.affinity.get(play_type.value)
+    branch_exposed_ids: set[str] = set()
+    if branch is not None:
+        exposed = branch_exposure.get(branch)
+        if exposed:
+            branch_exposed_ids.add(exposed)
+    tier_rank = {"small": 0, "medium": 1, "large": 2}
+    def _score(h: AgentHandle) -> tuple[int, int, int, int, int]:
+        # Circuit breaker (#22): strongly deprioritize a known-dead agent (0
+        # successes + a timeout or repeated failures) so a healthy peer always
+        # wins. Soft, not a hard filter — if every IDLE candidate is broken we
+        # still pick one rather than wedge (the play-availability gate already
+        # masks the play when no healthy capable agent exists).
+        task_history = h.task_history
+        successes = sum(1 for t in task_history if t.success)
+        failures = len(task_history) - successes
+        circuit_broken_score = (
+            1
+            if is_agent_circuit_broken(
+                tasks_completed=successes,
+                tasks_failed=failures,
+                timeout_count=h.timeout_count,
+                consecutive_timeouts=h.consecutive_timeouts,
+            )
+            else 0
+        )
+        # Branch exposure affinity: 0 if exposed to this branch, 1 otherwise
+        branch_exposure_score = 0 if h.agent_id in branch_exposed_ids else 1
+        # Type affinity: 0 if preferred type matches, 1 otherwise
+        type_score = 0 if (preferred_type and h.agent_type.value == preferred_type) else 1
+        # Tier cost: when a play accepts multiple tiers, preserve larger agents
+        # for plays that truly require them.
+        tier_score = tier_rank.get(h.model_tier or DEFAULT_MODEL_TIER, tier_rank["medium"])
+        # Least busy: ascending task count
+        busy_score = len(h.task_history)
+        return (circuit_broken_score, branch_exposure_score, type_score, tier_score, busy_score)
+    candidates.sort(key=_score)
+    return candidates[0]

agentshore/agents/auth_probe.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""Pre-launch CLI-agent backend auth probing.
+``preflight_identities`` validates the *GitHub* identity tokens a session will
+commit/merge with. It does NOT validate the *backend* auth each CLI agent uses
+to reach its model provider — e.g. the Codex CLI's cached ``chatgpt.com``
+session token, which carries a TTL and expires mid-run. When it expires the
+Codex CLI prints ``failed to renew cache TTL`` / ``failed to refresh available
+models`` to stderr and then hangs reading from stdin, so every dispatch runs to
+the full ``stream_idle_timeout`` before being killed — observed burning 16
+plays in a single session.
+This module is the single source of truth for "is agent <type>'s backend auth
+currently valid?", shared by three call sites so a green badge on the desktop
+setup screen provably means the launch gate will pass:
+* the CLI launch gate (``preflight_cli_agent_auth`` in ``session/bootstrap.py``),
+* the desktop ``session.start`` gate (a phase in ``sidecar/session_lifecycle``),
+* the desktop agents/identities setup screen (``agents.check_auth`` RPC).
+The probe is intentionally conservative: only agent types with a reliable,
+non-mutating auth-status command are probed; everything else returns
+``UNPROBEABLE`` and never blocks a launch, so this can never introduce a
+false-negative startup failure.
+"""
+from __future__ import annotations
+import os
+import shutil
+import subprocess
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from agentshore import subprocess_env
+from agentshore.state import CLI_AGENT_TYPES, AgentType
+if TYPE_CHECKING:
+    from agentshore.config.models import AgentConfig, RuntimeConfig
+# Shared status vocabulary. The desktop setup screen and the launch gate both
+# consume these exact strings, so a status here maps 1:1 to a frontend badge.
+AUTH_OK = "ok"
+AUTH_EXPIRED = "expired"
+AUTH_TIMEOUT = "timeout"
+AUTH_ERROR = "error"
+AUTH_UNPROBEABLE = "unprobeable"
+# Only these statuses gate a launch. ``error`` (binary missing / unexpected
+# non-zero with no auth marker) and ``timeout`` are surfaced but NOT blocking:
+# a transient probe hiccup must never strand an otherwise-fine session, and the
+# runtime auth-suppression backstop (ErrorClass.AUTH parking) catches a genuine
+# failure that slips through.
+_BLOCKING_STATUSES = frozenset({AUTH_EXPIRED})
+# Default probe timeout. Auth-status is a local credential read; 10s is ample
+# and keeps the setup screen / launch gate responsive.
+DEFAULT_PROBE_TIMEOUT_S = 10.0
+# Per-type auth-status command (args appended to the resolved binary). Only the
+# Codex CLI exposes a reliable, non-interactive, non-mutating status verb today;
+# the others fall through to UNPROBEABLE until a trustworthy command is
+# confirmed (a wrong probe that blocks launch is worse than no probe).
+_PROBE_ARGV: dict[AgentType, tuple[str, ...]] = {
+    AgentType.CODEX: ("login", "status"),
+}
+_DEFAULT_BINARY: dict[AgentType, str] = {
+    AgentType.CLAUDE_CODE: "claude",
+    AgentType.CODEX: "codex",
+    AgentType.GEMINI: "gemini",
+    AgentType.GROK: "grok",
+}
+# Output markers indicating the backend is NOT authenticated / the cached
+# session is dead. Matched case-insensitively against stdout+stderr. Includes
+# the Codex TTL-expiry signatures so the same vocabulary that classifies a
+# mid-run hang (ErrorClass.AUTH) also classifies a pre-launch probe.
+_NOT_AUTHED_MARKERS: tuple[str, ...] = (
+    "not logged in",
+    "not authenticated",
+    "logged out",
+    "no credentials",
+    "please run",
+    "run `codex login`",
+    "run 'codex login'",
+    "failed to renew cache ttl",
+    "failed to refresh available models",
+)
+@dataclass(frozen=True)
+class AuthProbeResult:
+    """Outcome of probing one agent type's backend auth."""
+    agent_type: AgentType
+    status: str
+    detail: str
+    @property
+    def ok(self) -> bool:
+        """True when auth is valid or the type can't be probed (non-blocking)."""
+        return self.status in (AUTH_OK, AUTH_UNPROBEABLE)
+    @property
+    def blocks_launch(self) -> bool:
+        """True only for a definitive, launch-gating auth failure."""
+        return self.status in _BLOCKING_STATUSES
+def _first_meaningful_line(text: str) -> str:
+    for line in text.splitlines():
+        stripped = line.strip()
+        if stripped:
+            return stripped[:200]
+    return ""
+def probe_cli_auth(
+    agent_type: AgentType,
+    env: dict[str, str] | None = None,
+    *,
+    binary: str | None = None,
+    timeout: float = DEFAULT_PROBE_TIMEOUT_S,
+) -> AuthProbeResult:
+    """Probe one CLI agent type's backend auth via its status command.
+    Runs a short, non-mutating auth-status subprocess under the ambient
+    environment overlaid with *env*. Never raises — every failure mode maps to
+    an :class:`AuthProbeResult`. Blocking in nature (uses ``subprocess.run``);
+    async callers should wrap it in ``asyncio.to_thread``.
+    """
+    argv_tail = _PROBE_ARGV.get(agent_type)
+    if argv_tail is None:
+        return AuthProbeResult(
+            agent_type, AUTH_UNPROBEABLE, "no auth-status probe for this agent type"
+        )
+    exe = binary or _DEFAULT_BINARY.get(agent_type, agent_type.value)
+    resolved = shutil.which(exe)
+    if resolved is None:
+        return AuthProbeResult(agent_type, AUTH_ERROR, f"{exe!r} not found on PATH")
+    full_env = {**os.environ, **(env or {})}
+    try:
+        # Popen (not subprocess.run) so a timeout can tree-kill: the probed CLIs
+        # (codex) are node shims that spawn children; subprocess.run's own
+        # timeout kill reaps only the direct child and leaves the node subtree
+        # alive. CREATE_NO_WINDOW + new process group (Windows; 0 elsewhere)
+        # suppresses the console flash / AV window-hooking latency this module
+        # exists to avoid and roots the child in a killable group, matching the
+        # dispatch path in cli_agent and the hardened runner in command.py.
+        proc = subprocess.Popen(  # noqa: S603 — fixed argv, resolved binary
+            [resolved, *argv_tail],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            # Pin stdin (never inherit the parent's): the desktop sidecar's
+            # stdin is the live Tauri JSON-RPC pipe, and the very CLIs we probe
+            # (codex) wedge on a contended/empty stdin. Enforced by
+            # tests/test_subprocess_stdin_guard.py.
+            stdin=subprocess.DEVNULL,
+            text=True,
+            env=full_env,
+            creationflags=subprocess_env.no_window_creationflags(),
+        )
+    except OSError as exc:
+        return AuthProbeResult(agent_type, AUTH_ERROR, str(exc)[:200])
+    try:
+        stdout, stderr = proc.communicate(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        # Kill the whole tree (codex → node), not just the direct child, so
+        # nothing lingers past the probe.
+        if proc.pid is not None:
+            subprocess_env.kill_tree_sync(proc.pid)
+        proc.kill()
+        proc.communicate()
+        return AuthProbeResult(agent_type, AUTH_TIMEOUT, f"auth probe timed out after {timeout:g}s")
+    stdout = stdout or ""
+    stderr = stderr or ""
+    combined = f"{stdout}\n{stderr}".lower()
+    if any(marker in combined for marker in _NOT_AUTHED_MARKERS):
+        detail = _first_meaningful_line(stderr) or _first_meaningful_line(stdout)
+        return AuthProbeResult(
+            agent_type, AUTH_EXPIRED, detail or "backend session not authenticated"
+        )
+    if proc.returncode != 0:
+        detail = _first_meaningful_line(stderr) or _first_meaningful_line(stdout)
+        return AuthProbeResult(
+            agent_type,
+            AUTH_ERROR,
+            f"auth probe exited {proc.returncode}: {detail}"
+            if detail
+            else f"auth probe exited {proc.returncode}",
+        )
+    return AuthProbeResult(agent_type, AUTH_OK, "authenticated")
+def configured_cli_agent_types(cfg: RuntimeConfig) -> list[tuple[AgentType, AgentConfig]]:
+    """Return (type, config) for each enabled, probeable CLI agent in *cfg*.
+    API agents (keys like ``api_gpt`` that don't resolve to an
+    :class:`AgentType`) and disabled agents are skipped. One entry per type —
+    a backend session token is shared across instances of a type, so probing it
+    once is sufficient.
+    """
+    seen: set[AgentType] = set()
+    out: list[tuple[AgentType, AgentConfig]] = []
+    for name, agent_cfg in cfg.agents.items():
+        try:
+            agent_type = AgentType(name)
+        except ValueError:
+            continue
+        if agent_type not in CLI_AGENT_TYPES or not agent_cfg.enabled:
+            continue
+        if agent_type in seen:
+            continue
+        seen.add(agent_type)
+        out.append((agent_type, agent_cfg))
+    return out
+def probe_configured_cli_auth(cfg: RuntimeConfig) -> list[AuthProbeResult]:
+    """Probe every enabled CLI agent type configured in *cfg*.
+    Each probe runs under the agent's resolved GitHub identity env overlay (for
+    parity with how the Agent Manager spawns it) and its configured ``binary``
+    override. Shared by the CLI and desktop launch gates.
+    """
+    from agentshore.agents.identity import resolve_identity_env
+    results: list[AuthProbeResult] = []
+    for agent_type, agent_cfg in configured_cli_agent_types(cfg):
+        try:
+            env = resolve_identity_env(cfg, agent_cfg)
+        except Exception:
+            # Identity resolution failures are a GitHub-token concern surfaced by
+            # preflight_identities; don't let one block the backend-auth probe.
+            env = {}
+        results.append(probe_cli_auth(agent_type, env, binary=agent_cfg.binary))
+    return results