agentshore 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. agentshore/__init__.py +10 -0
  2. agentshore/__main__.py +7 -0
  3. agentshore/agents/__init__.py +14 -0
  4. agentshore/agents/_jsonl.py +117 -0
  5. agentshore/agents/_selection.py +247 -0
  6. agentshore/agents/auth_probe.py +241 -0
  7. agentshore/agents/capabilities.py +55 -0
  8. agentshore/agents/circuit_breaker.py +116 -0
  9. agentshore/agents/cli_agent.py +1763 -0
  10. agentshore/agents/cli_grok.py +215 -0
  11. agentshore/agents/context_writer.py +41 -0
  12. agentshore/agents/costs.py +45 -0
  13. agentshore/agents/git_auth_probe.py +293 -0
  14. agentshore/agents/handle.py +285 -0
  15. agentshore/agents/health.py +158 -0
  16. agentshore/agents/identity.py +1009 -0
  17. agentshore/agents/manager.py +627 -0
  18. agentshore/agents/model_catalog.py +222 -0
  19. agentshore/agents/model_tiers.py +91 -0
  20. agentshore/agents/pricing.py +88 -0
  21. agentshore/agents/registry.py +43 -0
  22. agentshore/agents/worktree/__init__.py +96 -0
  23. agentshore/agents/worktree/allocator.py +842 -0
  24. agentshore/agents/worktree/manager.py +815 -0
  25. agentshore/agents/worktree/reaper.py +417 -0
  26. agentshore/agents/worktree/registry.py +158 -0
  27. agentshore/agents/worktree/rekey.py +214 -0
  28. agentshore/archive.py +136 -0
  29. agentshore/availability.py +172 -0
  30. agentshore/beads/__init__.py +769 -0
  31. agentshore/beads/downloader.py +286 -0
  32. agentshore/beads/setup.py +195 -0
  33. agentshore/budget.py +233 -0
  34. agentshore/cli/__init__.py +47 -0
  35. agentshore/cli/agent_select.py +508 -0
  36. agentshore/cli/caffeinate.py +53 -0
  37. agentshore/cli/commands/__init__.py +3 -0
  38. agentshore/cli/commands/add_budget.py +118 -0
  39. agentshore/cli/commands/dashboard.py +137 -0
  40. agentshore/cli/commands/identity.py +85 -0
  41. agentshore/cli/commands/init.py +359 -0
  42. agentshore/cli/commands/reload_config.py +43 -0
  43. agentshore/cli/commands/start.py +373 -0
  44. agentshore/cli/commands/stop.py +155 -0
  45. agentshore/cli/commands/trusted_ids.py +198 -0
  46. agentshore/cli/constants.py +36 -0
  47. agentshore/cli/helpers.py +280 -0
  48. agentshore/cli/identity_helpers.py +167 -0
  49. agentshore/cli/runtime.py +750 -0
  50. agentshore/cli_helpers.py +259 -0
  51. agentshore/command.py +382 -0
  52. agentshore/config/__init__.py +387 -0
  53. agentshore/config/_parsers.py +1148 -0
  54. agentshore/config/budget_writer.py +62 -0
  55. agentshore/config/coerce.py +15 -0
  56. agentshore/config/models.py +598 -0
  57. agentshore/config/yaml_io.py +113 -0
  58. agentshore/core/__init__.py +15 -0
  59. agentshore/core/base.py +819 -0
  60. agentshore/core/branch_sync.py +220 -0
  61. agentshore/core/context.py +56 -0
  62. agentshore/core/experience_recorder.py +244 -0
  63. agentshore/core/git_safety.py +562 -0
  64. agentshore/core/github_syncer.py +143 -0
  65. agentshore/core/helpers.py +239 -0
  66. agentshore/core/main_repo_guard.py +53 -0
  67. agentshore/core/mixins/__init__.py +10 -0
  68. agentshore/core/mixins/completion.py +1595 -0
  69. agentshore/core/mixins/dispatch.py +902 -0
  70. agentshore/core/mixins/drain.py +724 -0
  71. agentshore/core/mixins/lifecycle.py +302 -0
  72. agentshore/core/mixins/loop.py +1262 -0
  73. agentshore/core/mixins/snapshots.py +580 -0
  74. agentshore/core/mixins/state.py +614 -0
  75. agentshore/core/orchestrator.py +484 -0
  76. agentshore/core/override_queue.py +61 -0
  77. agentshore/core/phases.py +1202 -0
  78. agentshore/core/progress_monitor.py +76 -0
  79. agentshore/core/recovery_tracker.py +77 -0
  80. agentshore/core/session_runtime.py +169 -0
  81. agentshore/core/tick_action.py +100 -0
  82. agentshore/core/trunk_artifacts.py +230 -0
  83. agentshore/core/velocity_tracker.py +122 -0
  84. agentshore/core/wedge_signals.py +419 -0
  85. agentshore/dashboard/__init__.py +7 -0
  86. agentshore/dashboard/bridge.py +645 -0
  87. agentshore/dashboard/lifecycle.py +78 -0
  88. agentshore/dashboard/static/assets/claude-large-humanoid-BaLULVPd.png +0 -0
  89. agentshore/dashboard/static/assets/claude-medium-humanoid-DbrJlRVD.png +0 -0
  90. agentshore/dashboard/static/assets/claude-small-ball-D2RNLllT.png +0 -0
  91. agentshore/dashboard/static/assets/codex-large-humanoid-xUW_cD7j.png +0 -0
  92. agentshore/dashboard/static/assets/codex-medium-humanoid-DPb95Glt.png +0 -0
  93. agentshore/dashboard/static/assets/codex-small-ball-De43mzci.png +0 -0
  94. agentshore/dashboard/static/assets/gemini-large-humanoid-C58wuHUB.png +0 -0
  95. agentshore/dashboard/static/assets/gemini-medium-humanoid-BMWHKEBk.png +0 -0
  96. agentshore/dashboard/static/assets/gemini-small-ball-BPV5H2_p.png +0 -0
  97. agentshore/dashboard/static/assets/grok-large-humanoid-CXhPsrU_.png +0 -0
  98. agentshore/dashboard/static/assets/grok-medium-humanoid-B4bq4AQ8.png +0 -0
  99. agentshore/dashboard/static/assets/grok-small-ball-nhsQ2q79.png +0 -0
  100. agentshore/dashboard/static/assets/index-B-T4jPYV.css +1 -0
  101. agentshore/dashboard/static/assets/index-S2Ik8eSY.js +9 -0
  102. agentshore/dashboard/static/index.html +13 -0
  103. agentshore/data/__init__.py +41 -0
  104. agentshore/data/bootstrap_policy.pt +0 -0
  105. agentshore/data/corruption_evidence.py +235 -0
  106. agentshore/data/integrity.py +460 -0
  107. agentshore/data/migrations/__init__.py +79 -0
  108. agentshore/data/models.py +353 -0
  109. agentshore/data/schema.sql +364 -0
  110. agentshore/data/store/__init__.py +87 -0
  111. agentshore/data/store/base.py +81 -0
  112. agentshore/data/store/core.py +381 -0
  113. agentshore/data/store/helpers.py +220 -0
  114. agentshore/data/store/mixins/__init__.py +1 -0
  115. agentshore/data/store/mixins/agents.py +135 -0
  116. agentshore/data/store/mixins/archive.py +61 -0
  117. agentshore/data/store/mixins/branch_activity.py +63 -0
  118. agentshore/data/store/mixins/external_mutations.py +111 -0
  119. agentshore/data/store/mixins/feedback.py +51 -0
  120. agentshore/data/store/mixins/issues.py +264 -0
  121. agentshore/data/store/mixins/learnings.py +84 -0
  122. agentshore/data/store/mixins/plays.py +265 -0
  123. agentshore/data/store/mixins/pull_requests.py +306 -0
  124. agentshore/data/store/mixins/review_patterns.py +111 -0
  125. agentshore/data/store/mixins/reviews.py +151 -0
  126. agentshore/data/store/mixins/rl.py +129 -0
  127. agentshore/data/store/mixins/scope.py +40 -0
  128. agentshore/data/store/mixins/sessions.py +126 -0
  129. agentshore/data/store/mixins/trajectory.py +60 -0
  130. agentshore/data/store/mixins/work_claims.py +458 -0
  131. agentshore/data/store/mixins/worktrees.py +366 -0
  132. agentshore/data/store/rows.py +409 -0
  133. agentshore/environment.py +17 -0
  134. agentshore/errors.py +308 -0
  135. agentshore/github/__init__.py +7 -0
  136. agentshore/github/adapter.py +695 -0
  137. agentshore/github/labels.py +86 -0
  138. agentshore/github/pr_links.py +153 -0
  139. agentshore/github/trust.py +103 -0
  140. agentshore/identity_names.py +110 -0
  141. agentshore/identity_wizard/__init__.py +30 -0
  142. agentshore/identity_wizard/gh_accounts.py +87 -0
  143. agentshore/identity_wizard/keychain.py +70 -0
  144. agentshore/identity_wizard/report.py +204 -0
  145. agentshore/identity_wizard/wizard.py +626 -0
  146. agentshore/identity_wizard/yaml_patch.py +213 -0
  147. agentshore/ipc/__init__.py +9 -0
  148. agentshore/ipc/commands.py +114 -0
  149. agentshore/ipc/provider.py +186 -0
  150. agentshore/ipc/serializer.py +528 -0
  151. agentshore/ipc/server.py +296 -0
  152. agentshore/ipc/state_writer.py +255 -0
  153. agentshore/ipc/wire.py +49 -0
  154. agentshore/keyring_child.py +131 -0
  155. agentshore/learnings.py +152 -0
  156. agentshore/logging.py +136 -0
  157. agentshore/paths.py +53 -0
  158. agentshore/platform_compat.py +38 -0
  159. agentshore/play_pacing.py +7 -0
  160. agentshore/play_rules.py +80 -0
  161. agentshore/plays/__init__.py +10 -0
  162. agentshore/plays/_publish_reconciler.py +286 -0
  163. agentshore/plays/base.py +146 -0
  164. agentshore/plays/candidates.py +1466 -0
  165. agentshore/plays/dispatch.py +464 -0
  166. agentshore/plays/executor.py +1390 -0
  167. agentshore/plays/internal/__init__.py +3 -0
  168. agentshore/plays/internal/base.py +62 -0
  169. agentshore/plays/internal/end_agent.py +116 -0
  170. agentshore/plays/internal/end_session.py +54 -0
  171. agentshore/plays/internal/instantiate_agent.py +225 -0
  172. agentshore/plays/internal/reserved_action.py +49 -0
  173. agentshore/plays/internal/take_break.py +184 -0
  174. agentshore/plays/override.py +49 -0
  175. agentshore/plays/registry.py +140 -0
  176. agentshore/plays/resolver.py +707 -0
  177. agentshore/plays/scope.py +80 -0
  178. agentshore/plays/selector.py +54 -0
  179. agentshore/plays/skill_backed/__init__.py +7 -0
  180. agentshore/plays/skill_backed/_merge_reconcile.py +149 -0
  181. agentshore/plays/skill_backed/base.py +619 -0
  182. agentshore/plays/skill_backed/calibrate_alignment.py +45 -0
  183. agentshore/plays/skill_backed/cleanup.py +45 -0
  184. agentshore/plays/skill_backed/code_review.py +238 -0
  185. agentshore/plays/skill_backed/design_audit.py +140 -0
  186. agentshore/plays/skill_backed/gates.py +339 -0
  187. agentshore/plays/skill_backed/groom_backlog.py +119 -0
  188. agentshore/plays/skill_backed/issue_pickup.py +237 -0
  189. agentshore/plays/skill_backed/merge_pr.py +256 -0
  190. agentshore/plays/skill_backed/prune.py +57 -0
  191. agentshore/plays/skill_backed/reconcile_state.py +83 -0
  192. agentshore/plays/skill_backed/refine_tasks.py +60 -0
  193. agentshore/plays/skill_backed/run_qa.py +41 -0
  194. agentshore/plays/skill_backed/seed_project.py +196 -0
  195. agentshore/plays/skill_backed/systematic_debugging.py +32 -0
  196. agentshore/plays/skill_backed/unblock_pr.py +165 -0
  197. agentshore/plays/skill_backed/write_plan.py +33 -0
  198. agentshore/power.py +246 -0
  199. agentshore/pr_state.py +123 -0
  200. agentshore/reports/__init__.py +7 -0
  201. agentshore/reports/_aggregations.py +940 -0
  202. agentshore/reports/_loop_incidents.py +142 -0
  203. agentshore/reports/_repo_url.py +66 -0
  204. agentshore/reports/collector.py +279 -0
  205. agentshore/reports/generator.py +159 -0
  206. agentshore/reports/static/__init__.py +1 -0
  207. agentshore/reports/static/chart.min.js +20 -0
  208. agentshore/reports/templates/archive_comparison.html.j2 +238 -0
  209. agentshore/reports/templates/base.html.j2 +102 -0
  210. agentshore/reports/templates/components/charts.html.j2 +88 -0
  211. agentshore/reports/templates/components/tables.html.j2 +195 -0
  212. agentshore/reports/templates/end_session_report.html.j2 +742 -0
  213. agentshore/reports/templates/progress_report.html.j2 +79 -0
  214. agentshore/reports/templates/session_summary.html.j2 +375 -0
  215. agentshore/reports/types.py +297 -0
  216. agentshore/result_parser.py +315 -0
  217. agentshore/rl/__init__.py +58 -0
  218. agentshore/rl/action_space.py +30 -0
  219. agentshore/rl/checkpoint_store.py +255 -0
  220. agentshore/rl/cold_start.py +134 -0
  221. agentshore/rl/config_head.py +65 -0
  222. agentshore/rl/constants.py +10 -0
  223. agentshore/rl/eligibility.py +1052 -0
  224. agentshore/rl/experience.py +198 -0
  225. agentshore/rl/mask.py +639 -0
  226. agentshore/rl/mask_reason.py +129 -0
  227. agentshore/rl/metrics.py +494 -0
  228. agentshore/rl/observation.py +535 -0
  229. agentshore/rl/policy.py +291 -0
  230. agentshore/rl/replay.py +104 -0
  231. agentshore/rl/reward.py +394 -0
  232. agentshore/rl/selector.py +1089 -0
  233. agentshore/rl/training.py +242 -0
  234. agentshore/seed_input.py +122 -0
  235. agentshore/session/__init__.py +3 -0
  236. agentshore/session/bootstrap.py +559 -0
  237. agentshore/session_path.py +883 -0
  238. agentshore/sidecar/__init__.py +7 -0
  239. agentshore/sidecar/__main__.py +7 -0
  240. agentshore/sidecar/agent_auth.py +126 -0
  241. agentshore/sidecar/agents.py +277 -0
  242. agentshore/sidecar/archive_rpc.py +225 -0
  243. agentshore/sidecar/build_id.py +58 -0
  244. agentshore/sidecar/config.py +91 -0
  245. agentshore/sidecar/embedded_bridge.py +133 -0
  246. agentshore/sidecar/esr.py +56 -0
  247. agentshore/sidecar/handshake.py +97 -0
  248. agentshore/sidecar/identities.py +577 -0
  249. agentshore/sidecar/identity_config.py +319 -0
  250. agentshore/sidecar/notification_emitters.py +71 -0
  251. agentshore/sidecar/project.py +796 -0
  252. agentshore/sidecar/recents.py +171 -0
  253. agentshore/sidecar/server.py +1664 -0
  254. agentshore/sidecar/session_lifecycle.py +894 -0
  255. agentshore/sidecar/yaml_edits.py +119 -0
  256. agentshore/skills/__init__.py +231 -0
  257. agentshore/skills/templates/agentshore-calibrate-alignment/SKILL.md +58 -0
  258. agentshore/skills/templates/agentshore-cleanup/SKILL.md +56 -0
  259. agentshore/skills/templates/agentshore-code-review/SKILL.md +85 -0
  260. agentshore/skills/templates/agentshore-design-audit/SKILL.md +88 -0
  261. agentshore/skills/templates/agentshore-groom-backlog/SKILL.md +117 -0
  262. agentshore/skills/templates/agentshore-issue-pickup/SKILL.md +87 -0
  263. agentshore/skills/templates/agentshore-merge-pr/SKILL.md +36 -0
  264. agentshore/skills/templates/agentshore-prune/SKILL.md +57 -0
  265. agentshore/skills/templates/agentshore-reconcile-state/SKILL.md +97 -0
  266. agentshore/skills/templates/agentshore-refine-tasks/SKILL.md +93 -0
  267. agentshore/skills/templates/agentshore-run-qa/SKILL.md +82 -0
  268. agentshore/skills/templates/agentshore-seed-project/SKILL.md +111 -0
  269. agentshore/skills/templates/agentshore-systematic-debugging/SKILL.md +43 -0
  270. agentshore/skills/templates/agentshore-unblock-pr/SKILL.md +72 -0
  271. agentshore/skills/templates/agentshore-write-plan/SKILL.md +95 -0
  272. agentshore/state.py +915 -0
  273. agentshore/subprocess_env.py +369 -0
  274. agentshore/timelapse/__init__.py +192 -0
  275. agentshore/timelapse/setup.py +487 -0
  276. agentshore/ui/__init__.py +8 -0
  277. agentshore/ui/agentshore.tcss +114 -0
  278. agentshore/ui/alignment_levels.py +14 -0
  279. agentshore/ui/app.py +461 -0
  280. agentshore/ui/format.py +10 -0
  281. agentshore/ui/play_labels.py +40 -0
  282. agentshore/ui/provider.py +67 -0
  283. agentshore/ui/screens/__init__.py +21 -0
  284. agentshore/ui/screens/agent_detail.py +85 -0
  285. agentshore/ui/screens/dashboard.py +191 -0
  286. agentshore/ui/screens/escalation.py +88 -0
  287. agentshore/ui/screens/goals.py +59 -0
  288. agentshore/ui/screens/help.py +42 -0
  289. agentshore/ui/screens/issues.py +114 -0
  290. agentshore/ui/screens/shutdown.py +161 -0
  291. agentshore/ui/screens/startup.py +105 -0
  292. agentshore/ui/widgets/__init__.py +3 -0
  293. agentshore/ui/widgets/agent_panel.py +165 -0
  294. agentshore/ui/widgets/alert_bar.py +44 -0
  295. agentshore/ui/widgets/alignment.py +67 -0
  296. agentshore/ui/widgets/budget.py +83 -0
  297. agentshore/ui/widgets/play_history.py +93 -0
  298. agentshore/ui/widgets/rl_state.py +63 -0
  299. agentshore/ui/widgets/work_queue.py +54 -0
  300. agentshore/utils.py +10 -0
  301. agentshore-0.3.2.dist-info/METADATA +125 -0
  302. agentshore-0.3.2.dist-info/RECORD +305 -0
  303. agentshore-0.3.2.dist-info/WHEEL +4 -0
  304. agentshore-0.3.2.dist-info/entry_points.txt +2 -0
  305. agentshore-0.3.2.dist-info/licenses/LICENSE +21 -0
agentshore/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """AgentShore — RL-based multi-agent coding orchestrator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib.metadata import PackageNotFoundError, version
6
+
7
+ try:
8
+ __version__ = version("agentshore")
9
+ except PackageNotFoundError:
10
+ __version__ = "0.0.0"
agentshore/__main__.py ADDED
@@ -0,0 +1,7 @@
1
+ """Allow running as `python -m agentshore`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from agentshore.cli import main
6
+
7
+ main()
@@ -0,0 +1,14 @@
1
+ """Agent manager — lifecycle and CLI subprocess management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from agentshore.agents.capabilities import AGENT_CAPABILITIES
6
+ from agentshore.agents.handle import AgentHandle, AgentInvocationResult
7
+ from agentshore.agents.manager import AgentManager
8
+
9
+ __all__ = [
10
+ "AGENT_CAPABILITIES",
11
+ "AgentHandle",
12
+ "AgentInvocationResult",
13
+ "AgentManager",
14
+ ]
@@ -0,0 +1,117 @@
1
+ """Shared JSONL / usage-accounting primitives for the CLI agent adapters.
2
+
3
+ The CLI agents (Claude Code, Codex, Gemini, Grok) all emit JSONL on stdout and
4
+ share the same token-usage bookkeeping. These primitives used to live in
5
+ ``cli_agent``; ``cli_grok`` imported them from there while ``cli_agent``
6
+ lazily imported ``cli_grok`` back — a circular edge that forced two
7
+ lazy-import wrappers (issue: TNQA finding #6). Hoisting the shared pieces into
8
+ this leaf module breaks the cycle: both adapters import from here, and neither
9
+ imports the other for these helpers.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ from dataclasses import dataclass
16
+ from typing import TYPE_CHECKING
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Iterator
20
+
21
+
22
+ @dataclass(frozen=True, slots=True)
23
+ class _UsageTotals:
24
+ tokens_in: int = 0
25
+ tokens_out: int = 0
26
+ cached_tokens_in: int = 0
27
+ cache_write_tokens_in: int = 0
28
+ turn_count: int = 0
29
+ max_turn_input_tokens: int = 0
30
+
31
+
32
+ def _iter_json_events(raw: str) -> Iterator[dict[str, object]]:
33
+ """Yield each non-blank, JSON-decodable line of *raw* as a dict event.
34
+
35
+ The CLI agents all emit JSONL on stdout; this is the single scan loop they
36
+ share (skip blank lines, ``json.loads``, drop ``JSONDecodeError`` and
37
+ non-dict payloads) so the per-format parsers only express their own event
38
+ semantics.
39
+ """
40
+ for line in map(str.strip, raw.splitlines()):
41
+ if not line:
42
+ continue
43
+ try:
44
+ event = json.loads(line)
45
+ except json.JSONDecodeError:
46
+ continue
47
+ if isinstance(event, dict):
48
+ yield event
49
+
50
+
51
+ def _usage_totals_from_dict(
52
+ usage: dict[str, object], *, input_includes_cache: bool
53
+ ) -> _UsageTotals:
54
+ total_usage = usage.get("total_token_usage")
55
+ last_usage = usage.get("last_token_usage")
56
+ turn_usage: dict[str, object] | None = None
57
+ if isinstance(total_usage, dict):
58
+ if isinstance(last_usage, dict):
59
+ turn_usage = last_usage
60
+ usage = total_usage
61
+ input_includes_cache = True
62
+ elif isinstance(last_usage, dict):
63
+ usage = last_usage
64
+ turn_usage = last_usage
65
+ input_includes_cache = True
66
+
67
+ input_tokens = _first_int(usage, "input_tokens")
68
+ cache_read_tokens = _safe_int(usage.get("cached_input_tokens")) + _safe_int(
69
+ usage.get("cache_read_input_tokens")
70
+ )
71
+ cache_write_tokens = _first_int(usage, "cache_creation_input_tokens")
72
+ output_tokens = _first_int(usage, "output_tokens")
73
+ reasoning_tokens = _first_int(usage, "reasoning_output_tokens")
74
+
75
+ tokens_in = input_tokens if input_includes_cache else input_tokens + cache_read_tokens
76
+ if not input_includes_cache:
77
+ tokens_in += cache_write_tokens
78
+
79
+ tokens_out = output_tokens if output_tokens > 0 else reasoning_tokens
80
+ max_turn_input_tokens = _safe_int(turn_usage.get("input_tokens")) if turn_usage else tokens_in
81
+ return _UsageTotals(
82
+ tokens_in=tokens_in,
83
+ tokens_out=tokens_out,
84
+ cached_tokens_in=cache_read_tokens,
85
+ cache_write_tokens_in=cache_write_tokens,
86
+ max_turn_input_tokens=max_turn_input_tokens,
87
+ )
88
+
89
+
90
+ def _max_usage(left: _UsageTotals, right: _UsageTotals) -> _UsageTotals:
91
+ return _UsageTotals(
92
+ tokens_in=max(left.tokens_in, right.tokens_in),
93
+ tokens_out=max(left.tokens_out, right.tokens_out),
94
+ cached_tokens_in=max(left.cached_tokens_in, right.cached_tokens_in),
95
+ cache_write_tokens_in=max(left.cache_write_tokens_in, right.cache_write_tokens_in),
96
+ turn_count=max(left.turn_count, right.turn_count),
97
+ max_turn_input_tokens=max(left.max_turn_input_tokens, right.max_turn_input_tokens),
98
+ )
99
+
100
+
101
+ def _first_int(values: dict[str, object], *keys: str) -> int:
102
+ for key in keys:
103
+ parsed = _safe_int(values.get(key))
104
+ if parsed:
105
+ return parsed
106
+ return 0
107
+
108
+
109
+ def _safe_int(value: object) -> int:
110
+ if isinstance(value, bool):
111
+ return int(value)
112
+ if isinstance(value, int | float | str):
113
+ try:
114
+ return int(value)
115
+ except ValueError:
116
+ return 0
117
+ return 0
@@ -0,0 +1,247 @@
1
+ """Agent selection helpers — pure rule chain for the AgentManager.
2
+
3
+ Rule chain (applied in order):
4
+ 0a. Required-id pin (hard): if ``target_agent_id`` is set, narrow to that
5
+ single handle. Used by the resolver to pin code-review dispatch to a
6
+ specific agent whose GH identity has been verified upstream.
7
+ 0b. Required-type pin (hard): if ``target_agent_type`` is set (and no id pin),
8
+ narrow to that type. Used by ``instantiate_agent`` and similar
9
+ type-specific plays.
10
+ 1. Anti-confirmation bias (hard): exclude the PR author from CodeReview.
11
+ QA runs against the merged trunk and has no anti-confirmation; any
12
+ can_test agent may execute it.
13
+ 2. Exclude list (hard): drop agent types listed in ``preferences.exclude``
14
+ for this play type.
15
+ 3. Tier eligibility (hard): drop agents whose ``model_tier`` isn't in the
16
+ allowed set for this play type. Small tier is blocked from any coding
17
+ or strategic play; large tier is blocked from cheap mechanical plays
18
+ where the play is explicitly tier-limited.
19
+ 4. AntiConfirmationViolation if no candidates remain after hard filters.
20
+ 5. Branch exposure affinity (soft): promote agents with prior exposure to *branch*.
21
+ 6. Type affinity (soft): promote agents whose type matches
22
+ ``preferences.affinity`` for this play type.
23
+ 7. Tier cost (soft): prefer cheaper eligible tiers when affinity is tied.
24
+ 8. Least-busy tiebreaker: sort by ascending task history length.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ from typing import TYPE_CHECKING
30
+
31
+ import structlog
32
+
33
+ from agentshore.agents.model_tiers import DEFAULT_MODEL_TIER
34
+ from agentshore.errors import AntiConfirmationViolation
35
+ from agentshore.identity_names import same_identity
36
+ from agentshore.state import AgentStatus, PlayType, is_agent_circuit_broken
37
+
38
+ if TYPE_CHECKING:
39
+ from agentshore.agents.handle import AgentHandle
40
+ from agentshore.config import AgentPreferencesConfig
41
+
42
+ _logger = structlog.get_logger(__name__)
43
+
44
+ _REVIEW_PLAYS: frozenset[PlayType] = frozenset({PlayType.CODE_REVIEW})
45
+
46
+ # Per-play tier eligibility. Plays not listed here accept any tier.
47
+ # Three bands:
48
+ # - Cheap mechanical work (small ∪ medium): browser checks and
49
+ # merging already-approved PRs.
50
+ # - Universal (small ∪ medium ∪ large): cleanup — it's the bootstrap
51
+ # first-play when the backlog is large, and at that moment only the
52
+ # large agent has spawned. Excluding large here used to cause the
53
+ # bootstrap-cleanup to get skip:staffing'd on every fresh open-stocks-
54
+ # mcp session (seen 2026-05-22). Per the broad-bands philosophy let
55
+ # PPO learn tier affinity rather than pre-committing.
56
+ # - Coding & strategic work (medium ∪ large): anything that writes code,
57
+ # restructures local work, or interprets test failures. Small is too
58
+ # risky for downstream cost.
59
+ # - Heavyweight strategic / validation (large only): seed/design audits,
60
+ # final QA, and global calibration where medium's judgement isn't trusted
61
+ # to set or certify the trajectory.
62
+ # Medium is the universal fallback for the first three bands.
63
+ _PLAY_ALLOWED_TIERS: dict[PlayType, frozenset[str]] = {
64
+ PlayType.CLEANUP: frozenset({"small", "medium", "large"}),
65
+ PlayType.MERGE_PR: frozenset({"small", "medium"}),
66
+ # Medium ∪ large — coding & strategic
67
+ PlayType.ISSUE_PICKUP: frozenset({"medium", "large"}),
68
+ PlayType.UNBLOCK_PR: frozenset({"large", "medium"}),
69
+ PlayType.CODE_REVIEW: frozenset({"medium", "large"}),
70
+ PlayType.REFINE_TASK_BREAKDOWN: frozenset({"medium", "large"}),
71
+ PlayType.RUN_QA: frozenset({"large"}),
72
+ PlayType.WRITE_IMPLEMENTATION_PLAN: frozenset({"large"}),
73
+ PlayType.SYSTEMATIC_DEBUGGING: frozenset({"medium", "large"}),
74
+ # Large only — beads/design-doc audits and final validation.
75
+ PlayType.SEED_PROJECT: frozenset({"large"}),
76
+ PlayType.DESIGN_AUDIT: frozenset({"large"}),
77
+ PlayType.GROOM_BACKLOG: frozenset({"medium", "large"}),
78
+ PlayType.CALIBRATE_ALIGNMENT: frozenset({"large"}),
79
+ # RECONCILE_STATE — log-parse + targeted local remediation. Doesn't need
80
+ # large-tier reasoning; medium suffices and is cheaper when it fires.
81
+ PlayType.RECONCILE_STATE: frozenset({"medium", "large"}),
82
+ }
83
+
84
+
85
+ def allowed_tiers_for(play_type: PlayType) -> frozenset[str] | None:
86
+ """Return the allowed tier set for *play_type*, or None if unrestricted."""
87
+ return _PLAY_ALLOWED_TIERS.get(play_type)
88
+
89
+
90
+ def select_agent_for(
91
+ play_type: PlayType,
92
+ handles: dict[str, AgentHandle],
93
+ *,
94
+ pr_github_author: str | None = None,
95
+ branch_exposure: dict[str, str] | None = None,
96
+ preferences: AgentPreferencesConfig | None = None,
97
+ branch: str | None = None,
98
+ required_agent_type: str | None = None,
99
+ required_agent_id: str | None = None,
100
+ ) -> AgentHandle:
101
+ """Return the best available handle for *play_type* using the rule chain.
102
+
103
+ Raises ``AntiConfirmationViolation`` if all candidates are blocked by
104
+ hard constraints (anti-confirmation or exclude rules).
105
+
106
+ Raises ``AntiConfirmationViolation`` (with a distinct message) if there
107
+ are no IDLE agents at all.
108
+ """
109
+ branch_exposure = branch_exposure or {}
110
+
111
+ # -- Step 0: pool of IDLE handles ----------------------------------------
112
+ candidates: list[AgentHandle] = [h for h in handles.values() if h.status == AgentStatus.IDLE]
113
+
114
+ if not candidates:
115
+ raise AntiConfirmationViolation("No IDLE agents available for selection")
116
+
117
+ # -- Step 0a: required-id pin (resolver-chosen reviewer) -----------------
118
+ # The resolver picks a specific agent for code_review based on GH identity.
119
+ # When that handle is no longer IDLE (raced with another dispatch), the
120
+ # play is requeued by the executor — we don't silently fall through to a
121
+ # different agent that might violate the identity invariant.
122
+ if required_agent_id is not None:
123
+ candidates = [h for h in candidates if h.agent_id == required_agent_id]
124
+ if not candidates:
125
+ raise AntiConfirmationViolation(f"Pinned agent {required_agent_id!r} is no longer IDLE")
126
+
127
+ # -- Step 0b: required-type constraint (instantiate_agent and similar) ---
128
+ elif required_agent_type is not None:
129
+ candidates = [h for h in candidates if h.agent_type.value == required_agent_type]
130
+ if not candidates:
131
+ raise AntiConfirmationViolation(
132
+ f"No IDLE agents of required type {required_agent_type!r} available"
133
+ )
134
+
135
+ initial_count = len(candidates)
136
+
137
+ # Track which rule eliminated each candidate. Order of keys reflects
138
+ # filter precedence so a single log line reveals the dominant blocker.
139
+ eliminated: dict[str, list[str]] = {
140
+ "anti_confirmation": [],
141
+ "exclude": [],
142
+ "tier": [],
143
+ }
144
+
145
+ # -- Step 1: anti-confirmation hard filter --------------------------------
146
+ # CODE_REVIEW only: block any agent whose GH identity matches the PR author.
147
+ # QA runs against the merged trunk; any can_test agent is eligible.
148
+ # When pr_github_author is None (unknown — pre-session PR not yet refreshed),
149
+ # all candidates pass here and the executor's identity check acts as backstop.
150
+ blocked_ids: set[str] = set()
151
+
152
+ if play_type in _REVIEW_PLAYS and pr_github_author is not None:
153
+ for h in candidates:
154
+ if same_identity(h.github_identity, pr_github_author):
155
+ blocked_ids.add(h.agent_id)
156
+
157
+ survivors: list[AgentHandle] = []
158
+ for h in candidates:
159
+ if h.agent_id in blocked_ids:
160
+ eliminated["anti_confirmation"].append(h.agent_id)
161
+ else:
162
+ survivors.append(h)
163
+ candidates = survivors
164
+
165
+ # -- Step 2: exclude list hard filter ------------------------------------
166
+ excluded_types: set[str] = set()
167
+ if preferences is not None:
168
+ for exc_type in preferences.exclude.get(play_type.value, []):
169
+ excluded_types.add(exc_type)
170
+
171
+ if excluded_types:
172
+ survivors = []
173
+ for h in candidates:
174
+ if h.agent_type.value in excluded_types:
175
+ eliminated["exclude"].append(h.agent_id)
176
+ else:
177
+ survivors.append(h)
178
+ candidates = survivors
179
+
180
+ # -- Step 3: tier eligibility hard filter --------------------------------
181
+ allowed_tiers = _PLAY_ALLOWED_TIERS.get(play_type)
182
+ if allowed_tiers is not None:
183
+ survivors = []
184
+ for h in candidates:
185
+ if (h.model_tier or DEFAULT_MODEL_TIER) not in allowed_tiers:
186
+ eliminated["tier"].append(h.agent_id)
187
+ else:
188
+ survivors.append(h)
189
+ candidates = survivors
190
+
191
+ if not candidates:
192
+ _logger.warning(
193
+ "agent_selection_blocked",
194
+ play_type=play_type.value,
195
+ eliminated=eliminated,
196
+ candidate_count_in=initial_count,
197
+ )
198
+ raise AntiConfirmationViolation(
199
+ f"All agents blocked for {play_type.value!r} — "
200
+ "anti-confirmation, exclude, or tier-eligibility rules eliminated all candidates"
201
+ )
202
+
203
+ # -- Step 3: soft scoring (stable sort; lower score = more preferred) ----
204
+ preferred_type: str | None = None
205
+ if preferences is not None:
206
+ preferred_type = preferences.affinity.get(play_type.value)
207
+
208
+ branch_exposed_ids: set[str] = set()
209
+ if branch is not None:
210
+ exposed = branch_exposure.get(branch)
211
+ if exposed:
212
+ branch_exposed_ids.add(exposed)
213
+
214
+ tier_rank = {"small": 0, "medium": 1, "large": 2}
215
+
216
+ def _score(h: AgentHandle) -> tuple[int, int, int, int, int]:
217
+ # Circuit breaker (#22): strongly deprioritize a known-dead agent (0
218
+ # successes + a timeout or repeated failures) so a healthy peer always
219
+ # wins. Soft, not a hard filter — if every IDLE candidate is broken we
220
+ # still pick one rather than wedge (the play-availability gate already
221
+ # masks the play when no healthy capable agent exists).
222
+ task_history = h.task_history
223
+ successes = sum(1 for t in task_history if t.success)
224
+ failures = len(task_history) - successes
225
+ circuit_broken_score = (
226
+ 1
227
+ if is_agent_circuit_broken(
228
+ tasks_completed=successes,
229
+ tasks_failed=failures,
230
+ timeout_count=h.timeout_count,
231
+ consecutive_timeouts=h.consecutive_timeouts,
232
+ )
233
+ else 0
234
+ )
235
+ # Branch exposure affinity: 0 if exposed to this branch, 1 otherwise
236
+ branch_exposure_score = 0 if h.agent_id in branch_exposed_ids else 1
237
+ # Type affinity: 0 if preferred type matches, 1 otherwise
238
+ type_score = 0 if (preferred_type and h.agent_type.value == preferred_type) else 1
239
+ # Tier cost: when a play accepts multiple tiers, preserve larger agents
240
+ # for plays that truly require them.
241
+ tier_score = tier_rank.get(h.model_tier or DEFAULT_MODEL_TIER, tier_rank["medium"])
242
+ # Least busy: ascending task count
243
+ busy_score = len(h.task_history)
244
+ return (circuit_broken_score, branch_exposure_score, type_score, tier_score, busy_score)
245
+
246
+ candidates.sort(key=_score)
247
+ return candidates[0]
@@ -0,0 +1,241 @@
1
+ """Pre-launch CLI-agent backend auth probing.
2
+
3
+ ``preflight_identities`` validates the *GitHub* identity tokens a session will
4
+ commit/merge with. It does NOT validate the *backend* auth each CLI agent uses
5
+ to reach its model provider — e.g. the Codex CLI's cached ``chatgpt.com``
6
+ session token, which carries a TTL and expires mid-run. When it expires the
7
+ Codex CLI prints ``failed to renew cache TTL`` / ``failed to refresh available
8
+ models`` to stderr and then hangs reading from stdin, so every dispatch runs to
9
+ the full ``stream_idle_timeout`` before being killed — observed burning 16
10
+ plays in a single session.
11
+
12
+ This module is the single source of truth for "is agent <type>'s backend auth
13
+ currently valid?", shared by three call sites so a green badge on the desktop
14
+ setup screen provably means the launch gate will pass:
15
+
16
+ * the CLI launch gate (``preflight_cli_agent_auth`` in ``session/bootstrap.py``),
17
+ * the desktop ``session.start`` gate (a phase in ``sidecar/session_lifecycle``),
18
+ * the desktop agents/identities setup screen (``agents.check_auth`` RPC).
19
+
20
+ The probe is intentionally conservative: only agent types with a reliable,
21
+ non-mutating auth-status command are probed; everything else returns
22
+ ``UNPROBEABLE`` and never blocks a launch, so this can never introduce a
23
+ false-negative startup failure.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import shutil
30
+ import subprocess
31
+ from dataclasses import dataclass
32
+ from typing import TYPE_CHECKING
33
+
34
+ from agentshore import subprocess_env
35
+ from agentshore.state import CLI_AGENT_TYPES, AgentType
36
+
37
+ if TYPE_CHECKING:
38
+ from agentshore.config.models import AgentConfig, RuntimeConfig
39
+
40
+ # Shared status vocabulary. The desktop setup screen and the launch gate both
41
+ # consume these exact strings, so a status here maps 1:1 to a frontend badge.
42
+ AUTH_OK = "ok"
43
+ AUTH_EXPIRED = "expired"
44
+ AUTH_TIMEOUT = "timeout"
45
+ AUTH_ERROR = "error"
46
+ AUTH_UNPROBEABLE = "unprobeable"
47
+
48
+ # Only these statuses gate a launch. ``error`` (binary missing / unexpected
49
+ # non-zero with no auth marker) and ``timeout`` are surfaced but NOT blocking:
50
+ # a transient probe hiccup must never strand an otherwise-fine session, and the
51
+ # runtime auth-suppression backstop (ErrorClass.AUTH parking) catches a genuine
52
+ # failure that slips through.
53
+ _BLOCKING_STATUSES = frozenset({AUTH_EXPIRED})
54
+
55
+ # Default probe timeout. Auth-status is a local credential read; 10s is ample
56
+ # and keeps the setup screen / launch gate responsive.
57
+ DEFAULT_PROBE_TIMEOUT_S = 10.0
58
+
59
+ # Per-type auth-status command (args appended to the resolved binary). Only the
60
+ # Codex CLI exposes a reliable, non-interactive, non-mutating status verb today;
61
+ # the others fall through to UNPROBEABLE until a trustworthy command is
62
+ # confirmed (a wrong probe that blocks launch is worse than no probe).
63
+ _PROBE_ARGV: dict[AgentType, tuple[str, ...]] = {
64
+ AgentType.CODEX: ("login", "status"),
65
+ }
66
+
67
+ _DEFAULT_BINARY: dict[AgentType, str] = {
68
+ AgentType.CLAUDE_CODE: "claude",
69
+ AgentType.CODEX: "codex",
70
+ AgentType.GEMINI: "gemini",
71
+ AgentType.GROK: "grok",
72
+ }
73
+
74
+ # Output markers indicating the backend is NOT authenticated / the cached
75
+ # session is dead. Matched case-insensitively against stdout+stderr. Includes
76
+ # the Codex TTL-expiry signatures so the same vocabulary that classifies a
77
+ # mid-run hang (ErrorClass.AUTH) also classifies a pre-launch probe.
78
+ _NOT_AUTHED_MARKERS: tuple[str, ...] = (
79
+ "not logged in",
80
+ "not authenticated",
81
+ "logged out",
82
+ "no credentials",
83
+ "please run",
84
+ "run `codex login`",
85
+ "run 'codex login'",
86
+ "failed to renew cache ttl",
87
+ "failed to refresh available models",
88
+ )
89
+
90
+
91
+ @dataclass(frozen=True)
92
+ class AuthProbeResult:
93
+ """Outcome of probing one agent type's backend auth."""
94
+
95
+ agent_type: AgentType
96
+ status: str
97
+ detail: str
98
+
99
+ @property
100
+ def ok(self) -> bool:
101
+ """True when auth is valid or the type can't be probed (non-blocking)."""
102
+ return self.status in (AUTH_OK, AUTH_UNPROBEABLE)
103
+
104
+ @property
105
+ def blocks_launch(self) -> bool:
106
+ """True only for a definitive, launch-gating auth failure."""
107
+ return self.status in _BLOCKING_STATUSES
108
+
109
+
110
+ def _first_meaningful_line(text: str) -> str:
111
+ for line in text.splitlines():
112
+ stripped = line.strip()
113
+ if stripped:
114
+ return stripped[:200]
115
+ return ""
116
+
117
+
118
+ def probe_cli_auth(
119
+ agent_type: AgentType,
120
+ env: dict[str, str] | None = None,
121
+ *,
122
+ binary: str | None = None,
123
+ timeout: float = DEFAULT_PROBE_TIMEOUT_S,
124
+ ) -> AuthProbeResult:
125
+ """Probe one CLI agent type's backend auth via its status command.
126
+
127
+ Runs a short, non-mutating auth-status subprocess under the ambient
128
+ environment overlaid with *env*. Never raises — every failure mode maps to
129
+ an :class:`AuthProbeResult`. Blocking in nature (uses ``subprocess.run``);
130
+ async callers should wrap it in ``asyncio.to_thread``.
131
+ """
132
+ argv_tail = _PROBE_ARGV.get(agent_type)
133
+ if argv_tail is None:
134
+ return AuthProbeResult(
135
+ agent_type, AUTH_UNPROBEABLE, "no auth-status probe for this agent type"
136
+ )
137
+
138
+ exe = binary or _DEFAULT_BINARY.get(agent_type, agent_type.value)
139
+ resolved = shutil.which(exe)
140
+ if resolved is None:
141
+ return AuthProbeResult(agent_type, AUTH_ERROR, f"{exe!r} not found on PATH")
142
+
143
+ full_env = {**os.environ, **(env or {})}
144
+ try:
145
+ # Popen (not subprocess.run) so a timeout can tree-kill: the probed CLIs
146
+ # (codex) are node shims that spawn children; subprocess.run's own
147
+ # timeout kill reaps only the direct child and leaves the node subtree
148
+ # alive. CREATE_NO_WINDOW + new process group (Windows; 0 elsewhere)
149
+ # suppresses the console flash / AV window-hooking latency this module
150
+ # exists to avoid and roots the child in a killable group, matching the
151
+ # dispatch path in cli_agent and the hardened runner in command.py.
152
+ proc = subprocess.Popen( # noqa: S603 — fixed argv, resolved binary
153
+ [resolved, *argv_tail],
154
+ stdout=subprocess.PIPE,
155
+ stderr=subprocess.PIPE,
156
+ # Pin stdin (never inherit the parent's): the desktop sidecar's
157
+ # stdin is the live Tauri JSON-RPC pipe, and the very CLIs we probe
158
+ # (codex) wedge on a contended/empty stdin. Enforced by
159
+ # tests/test_subprocess_stdin_guard.py.
160
+ stdin=subprocess.DEVNULL,
161
+ text=True,
162
+ env=full_env,
163
+ creationflags=subprocess_env.no_window_creationflags(),
164
+ )
165
+ except OSError as exc:
166
+ return AuthProbeResult(agent_type, AUTH_ERROR, str(exc)[:200])
167
+
168
+ try:
169
+ stdout, stderr = proc.communicate(timeout=timeout)
170
+ except subprocess.TimeoutExpired:
171
+ # Kill the whole tree (codex → node), not just the direct child, so
172
+ # nothing lingers past the probe.
173
+ if proc.pid is not None:
174
+ subprocess_env.kill_tree_sync(proc.pid)
175
+ proc.kill()
176
+ proc.communicate()
177
+ return AuthProbeResult(agent_type, AUTH_TIMEOUT, f"auth probe timed out after {timeout:g}s")
178
+
179
+ stdout = stdout or ""
180
+ stderr = stderr or ""
181
+ combined = f"{stdout}\n{stderr}".lower()
182
+ if any(marker in combined for marker in _NOT_AUTHED_MARKERS):
183
+ detail = _first_meaningful_line(stderr) or _first_meaningful_line(stdout)
184
+ return AuthProbeResult(
185
+ agent_type, AUTH_EXPIRED, detail or "backend session not authenticated"
186
+ )
187
+ if proc.returncode != 0:
188
+ detail = _first_meaningful_line(stderr) or _first_meaningful_line(stdout)
189
+ return AuthProbeResult(
190
+ agent_type,
191
+ AUTH_ERROR,
192
+ f"auth probe exited {proc.returncode}: {detail}"
193
+ if detail
194
+ else f"auth probe exited {proc.returncode}",
195
+ )
196
+ return AuthProbeResult(agent_type, AUTH_OK, "authenticated")
197
+
198
+
199
+ def configured_cli_agent_types(cfg: RuntimeConfig) -> list[tuple[AgentType, AgentConfig]]:
200
+ """Return (type, config) for each enabled, probeable CLI agent in *cfg*.
201
+
202
+ API agents (keys like ``api_gpt`` that don't resolve to an
203
+ :class:`AgentType`) and disabled agents are skipped. One entry per type —
204
+ a backend session token is shared across instances of a type, so probing it
205
+ once is sufficient.
206
+ """
207
+ seen: set[AgentType] = set()
208
+ out: list[tuple[AgentType, AgentConfig]] = []
209
+ for name, agent_cfg in cfg.agents.items():
210
+ try:
211
+ agent_type = AgentType(name)
212
+ except ValueError:
213
+ continue
214
+ if agent_type not in CLI_AGENT_TYPES or not agent_cfg.enabled:
215
+ continue
216
+ if agent_type in seen:
217
+ continue
218
+ seen.add(agent_type)
219
+ out.append((agent_type, agent_cfg))
220
+ return out
221
+
222
+
223
+ def probe_configured_cli_auth(cfg: RuntimeConfig) -> list[AuthProbeResult]:
224
+ """Probe every enabled CLI agent type configured in *cfg*.
225
+
226
+ Each probe runs under the agent's resolved GitHub identity env overlay (for
227
+ parity with how the Agent Manager spawns it) and its configured ``binary``
228
+ override. Shared by the CLI and desktop launch gates.
229
+ """
230
+ from agentshore.agents.identity import resolve_identity_env
231
+
232
+ results: list[AuthProbeResult] = []
233
+ for agent_type, agent_cfg in configured_cli_agent_types(cfg):
234
+ try:
235
+ env = resolve_identity_env(cfg, agent_cfg)
236
+ except Exception:
237
+ # Identity resolution failures are a GitHub-token concern surfaced by
238
+ # preflight_identities; don't let one block the backend-auth probe.
239
+ env = {}
240
+ results.append(probe_cli_auth(agent_type, env, binary=agent_cfg.binary))
241
+ return results