@team-agent/installer 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/team_agent/abnormal_track.py +253 -0
- package/src/team_agent/cli/commands.py +17 -1
- package/src/team_agent/cli/parser.py +2 -2
- package/src/team_agent/compiler.py +1 -1
- package/src/team_agent/coordinator/lifecycle.py +20 -2
- package/src/team_agent/display/__init__.py +31 -0
- package/src/team_agent/display/adaptive.py +425 -0
- package/src/team_agent/display/backend.py +46 -0
- package/src/team_agent/display/close.py +6 -0
- package/src/team_agent/display/rebuild.py +102 -0
- package/src/team_agent/display/tiling.py +156 -0
- package/src/team_agent/display/worker_window.py +4 -0
- package/src/team_agent/display/workspace.py +36 -127
- package/src/team_agent/idle_predicate.py +200 -0
- package/src/team_agent/idle_takeover.py +59 -0
- package/src/team_agent/idle_takeover_wiring.py +111 -0
- package/src/team_agent/launch/core.py +13 -4
- package/src/team_agent/leader/__init__.py +444 -61
- package/src/team_agent/message_store/agent_health.py +6 -2
- package/src/team_agent/message_store/core.py +51 -18
- package/src/team_agent/message_store/leader_notification_log.py +63 -38
- package/src/team_agent/message_store/result_watchers.py +17 -11
- package/src/team_agent/message_store/schema.py +19 -2
- package/src/team_agent/message_store/schema_migration.py +386 -0
- package/src/team_agent/messaging/delivery.py +45 -2
- package/src/team_agent/messaging/leader_panes.py +115 -21
- package/src/team_agent/messaging/send.py +33 -0
- package/src/team_agent/messaging/tmux_io.py +49 -10
- package/src/team_agent/messaging/trust_auto_answer.py +11 -3
- package/src/team_agent/provider_state/README.md +78 -0
- package/src/team_agent/provider_state/__init__.py +86 -0
- package/src/team_agent/provider_state/claude.py +86 -0
- package/src/team_agent/provider_state/codex.py +84 -0
- package/src/team_agent/provider_state/common.py +207 -0
- package/src/team_agent/provider_state/registry.py +118 -0
- package/src/team_agent/restart/orchestration.py +9 -9
- package/src/team_agent/runtime.py +62 -12
- package/src/team_agent/spec.py +4 -3
- package/src/team_agent/wake.py +58 -0
|
@@ -32,6 +32,49 @@ def _tmux_inject_text(
|
|
|
32
32
|
*,
|
|
33
33
|
bypass_non_input_gate: bool = False,
|
|
34
34
|
) -> dict[str, Any]:
|
|
35
|
+
# Round-5 follow-up: empty-text Enter path (used by trust auto-answer to
|
|
36
|
+
# accept Codex's default `1. Yes, continue` choice with a plain Enter).
|
|
37
|
+
# tmux rejects set-buffer / paste-buffer of an empty string, so the
|
|
38
|
+
# buffer-paste route would leave the trust prompt stuck. Issue
|
|
39
|
+
# `send-keys -t <target> <submit_key>` directly and bypass the buffer
|
|
40
|
+
# path entirely.
|
|
41
|
+
if text == "":
|
|
42
|
+
proc = run_cmd(["tmux", "send-keys", "-t", target, submit_key], timeout=10)
|
|
43
|
+
if proc.returncode != 0:
|
|
44
|
+
return {
|
|
45
|
+
"ok": False,
|
|
46
|
+
"stage": "send-keys",
|
|
47
|
+
"error": proc.stderr.strip() or "tmux send-keys failed",
|
|
48
|
+
"attempts": [
|
|
49
|
+
{
|
|
50
|
+
"attempt": 1,
|
|
51
|
+
"submitted": False,
|
|
52
|
+
"verification": "send_keys_failed",
|
|
53
|
+
"submit_key": submit_key,
|
|
54
|
+
}
|
|
55
|
+
],
|
|
56
|
+
"verification": "send_keys_failed",
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
"ok": True,
|
|
60
|
+
"stage": "submitted",
|
|
61
|
+
"visible": True,
|
|
62
|
+
"submitted": True,
|
|
63
|
+
"verification": "empty_text_send_keys",
|
|
64
|
+
"submit_verification": f"{submit_key}_sent_direct",
|
|
65
|
+
"turn_verification": "not_required",
|
|
66
|
+
"attempts": [
|
|
67
|
+
{
|
|
68
|
+
"attempt": 1,
|
|
69
|
+
"submitted": True,
|
|
70
|
+
"verification": "empty_text_send_keys",
|
|
71
|
+
"submit_key": submit_key,
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
"submit_attempts": [
|
|
75
|
+
{"attempt": 1, "submitted": True, "verification": "send_keys"}
|
|
76
|
+
],
|
|
77
|
+
}
|
|
35
78
|
token_match = re.search(r"\[team-agent-token:([^\]]+)\]", text)
|
|
36
79
|
token = token_match.group(1) if token_match else ""
|
|
37
80
|
attempt_log: list[dict[str, Any]] = []
|
|
@@ -134,6 +177,11 @@ def _tmux_inject_text(
|
|
|
134
177
|
"submit_attempts": submit.get("attempts"),
|
|
135
178
|
}
|
|
136
179
|
submit_verification = _leader_submit_verification(submit.get("verification"), verification, submit_key)
|
|
180
|
+
# Gap 42: paste+submit success is authoritative for delivery. The post-submit
|
|
181
|
+
# turn-boundary probe is observation metadata only, never a delivery gate — a
|
|
182
|
+
# busy / compacting recipient that has not yet shown a new prompt marker is
|
|
183
|
+
# still a successful delivery. Real paste/submit failures are caught and
|
|
184
|
+
# returned above; this point is only reached after submit reported ok.
|
|
137
185
|
turn_visible, turn_verification, turn_capture = _wait_for_leader_new_turn(
|
|
138
186
|
target,
|
|
139
187
|
text,
|
|
@@ -142,16 +190,7 @@ def _tmux_inject_text(
|
|
|
142
190
|
timeout=2.0,
|
|
143
191
|
)
|
|
144
192
|
if not turn_visible:
|
|
145
|
-
|
|
146
|
-
"ok": False,
|
|
147
|
-
"stage": "turn-boundary-verification",
|
|
148
|
-
"error": f"leader turn boundary not verified: {turn_verification}",
|
|
149
|
-
"attempts": attempt_log,
|
|
150
|
-
"verification": verification,
|
|
151
|
-
"submit_verification": submit_verification,
|
|
152
|
-
"turn_verification": turn_verification,
|
|
153
|
-
"submit_attempts": submit.get("attempts"),
|
|
154
|
-
}
|
|
193
|
+
turn_verification = "not_yet_observed"
|
|
155
194
|
return {
|
|
156
195
|
"ok": True,
|
|
157
196
|
"stage": "submitted",
|
|
@@ -18,14 +18,22 @@ def retry_injection_after_trust_auto_answer(
|
|
|
18
18
|
buffer_name: str,
|
|
19
19
|
provider: str,
|
|
20
20
|
) -> dict[str, Any]:
|
|
21
|
-
from team_agent.messaging.delivery import _wait_for_trust_prompt_dismissal
|
|
21
|
+
from team_agent.messaging.delivery import _tmux_pane_width, _wait_for_trust_prompt_dismissal
|
|
22
22
|
from team_agent.messaging.leader_panes import attempt_trust_auto_answer
|
|
23
|
+
pane_target = injection.get("pane_id") or target
|
|
24
|
+
# Live wiring: query tmux pane width now and pass via state["pane_width"]
|
|
25
|
+
# (symmetric with _deliver_pending_message). Fail-safe on query failure —
|
|
26
|
+
# leave pane_width absent so the matcher falls back to exact equality.
|
|
27
|
+
width_query = _tmux_pane_width(pane_target)
|
|
28
|
+
trust_state = dict(state) if isinstance(state, dict) else {}
|
|
29
|
+
if width_query.get("ok"):
|
|
30
|
+
trust_state["pane_width"] = width_query["pane_width"]
|
|
23
31
|
answer = attempt_trust_auto_answer(
|
|
24
32
|
workspace,
|
|
25
|
-
|
|
33
|
+
pane_target,
|
|
26
34
|
injection.get("pane_capture_tail") or "",
|
|
27
35
|
event_log,
|
|
28
|
-
state=
|
|
36
|
+
state=trust_state,
|
|
29
37
|
)
|
|
30
38
|
if not answer.get("answered"):
|
|
31
39
|
return injection
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Adding a provider idle/turn-state adapter
|
|
2
|
+
|
|
3
|
+
Gap 32 decides every node's idle/working/abnormal state from a deterministic
|
|
4
|
+
FILE FACT — the provider's own session-log/rollout turn-lifecycle records — never
|
|
5
|
+
from the pane screen. The predicate, abnormal track, and wake layers are
|
|
6
|
+
**provider-neutral and reused unchanged**. To support a brand-new CLI you fill the
|
|
7
|
+
small checklist below; you do not touch any neutral module.
|
|
8
|
+
|
|
9
|
+
## What you add (only two places)
|
|
10
|
+
|
|
11
|
+
1. `src/team_agent/provider_state/<provider>.py` — a thin reader that translates
|
|
12
|
+
that CLI's session records into normalized lifecycle facts.
|
|
13
|
+
2. one entry in `src/team_agent/provider_state/registry.py` — pure infra DATA.
|
|
14
|
+
|
|
15
|
+
Everything else (`idle_predicate.py`, `abnormal_track.py`, `wake.py`,
|
|
16
|
+
`idle_takeover.py`) is provider-neutral and must stay free of provider names
|
|
17
|
+
(there is a grep test, C6).
|
|
18
|
+
|
|
19
|
+
## The checklist
|
|
20
|
+
|
|
21
|
+
### 1. Session/rollout file location
|
|
22
|
+
- Where does this CLI write its per-session log? (root dir + path layout)
|
|
23
|
+
- How does the framework already learn each agent's path? (it is captured into
|
|
24
|
+
runtime state per agent as `rollout_path`; confirm yours lands there.)
|
|
25
|
+
- Record it under the registry entry `file_location`.
|
|
26
|
+
|
|
27
|
+
### 2. Turn-lifecycle event types (do the empirical capture FIRST)
|
|
28
|
+
Capture REAL records from a live session for each state and record the exact
|
|
29
|
+
record `type`/field. These become the contract fixtures (real-fixture-first):
|
|
30
|
+
- **turn-started / open turn** — the marker that a turn is in flight.
|
|
31
|
+
- **turn-complete** — the close that means idle.
|
|
32
|
+
- **interrupted** — user ESC / abort (idle_interrupted, idle + red note).
|
|
33
|
+
- **blocked / approval** — awaiting a human decision (blocked_on_human).
|
|
34
|
+
- **error / failed** — a structured terminal fault record.
|
|
35
|
+
Implement these as `extract_facts(records) -> (facts, diagnostics)` in your reader,
|
|
36
|
+
emitting `team_agent.provider_state.common` fact kinds: `TURN_OPEN`,
|
|
37
|
+
`TURN_COMPLETE`, `INTERRUPTED`, `FAILED`, `APPROVAL`, `ERROR`. Fault facts should
|
|
38
|
+
carry `signature`, `turn_id`, and `raw` (the original record). Filter out trailing
|
|
39
|
+
metadata/telemetry records so the verdict is the last LIFECYCLE fact, not the last
|
|
40
|
+
physical line.
|
|
41
|
+
|
|
42
|
+
Reference markers already implemented:
|
|
43
|
+
- Claude transcript: assistant `stop_reason==end_turn` (idle) / `==tool_use`
|
|
44
|
+
(working); user text `[Request interrupted by user]` (interrupted); user
|
|
45
|
+
`tool_result is_error==true` and system `subtype==api_error,level==error` (faults).
|
|
46
|
+
- Codex rollout: `event_msg payload.type==task_started|task_complete`;
|
|
47
|
+
`turn_aborted reason==interrupted`; app-server `turn.status==failed` and
|
|
48
|
+
`*/requestApproval`.
|
|
49
|
+
|
|
50
|
+
### 3. Black/white list seed entries
|
|
51
|
+
- `error_lists.whitelist` — record/string patterns that are benign → skip.
|
|
52
|
+
- `error_lists.blacklist` — known error signatures → notify (`api error`,
|
|
53
|
+
`rate limit`, `overloaded`, traceback/panic, provider `failed`, ...).
|
|
54
|
+
- Precedence is whitelist > blacklist > default-notify (catch-bias for structured
|
|
55
|
+
faults only). Lists are DATA — adding a pattern is one edit + one fixture.
|
|
56
|
+
|
|
57
|
+
### 4. Optional hook accelerator
|
|
58
|
+
- Does the CLI expose hooks that fire on turn boundaries (e.g. a `Stop`/`Notify`
|
|
59
|
+
program)? If so they can push a fact row to wake the watcher faster — but the
|
|
60
|
+
file fact remains the source of truth (the hook is validated against the file,
|
|
61
|
+
never the sole signal).
|
|
62
|
+
|
|
63
|
+
### 5. Process/identity facts for the liveness guard
|
|
64
|
+
- How to read the provider process identity (start-time / cmdline) so an open
|
|
65
|
+
turn whose process was replaced (PID reuse) classifies as `crashed_mid_turn`,
|
|
66
|
+
never eternal `working` (C4). `provider_state.common.process_is_live` already
|
|
67
|
+
implements the comparison given `{"expected": {...}, "current": {...}}`.
|
|
68
|
+
|
|
69
|
+
## Reused unchanged (do NOT modify per provider)
|
|
70
|
+
- `idle_predicate.evaluate_takeover_reminder` — all-idle + arm-after-delegation +
|
|
71
|
+
monotonic debounce + edge ack.
|
|
72
|
+
- `abnormal_track.process_abnormal_records` / `detect_whole_team_gone` — dedup,
|
|
73
|
+
catch-bias, coordinator-independent whole-team-gone.
|
|
74
|
+
- `wake` — file-change watch + mtime gate.
|
|
75
|
+
- `idle_takeover` — the public facade.
|
|
76
|
+
|
|
77
|
+
If you find yourself editing a neutral module to add a provider, stop — the fact
|
|
78
|
+
you need belongs in the reader or the registry entry instead.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Provider turn-state readers behind one shared interface (Gap 32 §6).
|
|
2
|
+
|
|
3
|
+
``read_turn_state`` is the single entry the rest of the runtime uses; provider
|
|
4
|
+
dispatch happens here (and in registry data), so the neutral predicate /
|
|
5
|
+
abnormal / wake modules never name a provider.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import importlib
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from team_agent.provider_state.registry import get_provider_registry
|
|
14
|
+
|
|
15
|
+
_READER_CACHE: dict[str, Any] = {}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def read_turn_state(
|
|
19
|
+
provider: str,
|
|
20
|
+
session_log_text: str,
|
|
21
|
+
*,
|
|
22
|
+
process: Any = None,
|
|
23
|
+
file_silence_seconds: float = 0,
|
|
24
|
+
registry: Any = None,
|
|
25
|
+
) -> dict[str, Any]:
|
|
26
|
+
"""Classify a node's turn state from its provider session-log text.
|
|
27
|
+
|
|
28
|
+
Returns the stable dict shape: state / turn_id / reason / source /
|
|
29
|
+
annotations / diagnostics. A missing/unknown provider or an unreadable
|
|
30
|
+
file fails safe to ``unknown`` (never idle, Gap 32 C5).
|
|
31
|
+
"""
|
|
32
|
+
_ = file_silence_seconds # open-turn beats silence (C14); silence never forces idle
|
|
33
|
+
reader = _reader_for(provider, registry)
|
|
34
|
+
if reader is None:
|
|
35
|
+
return {
|
|
36
|
+
"state": "unknown",
|
|
37
|
+
"turn_id": None,
|
|
38
|
+
"reason": "unknown_provider",
|
|
39
|
+
"source": "registry",
|
|
40
|
+
"annotations": [],
|
|
41
|
+
"diagnostics": [{"kind": "unknown_provider", "provider": provider}],
|
|
42
|
+
}
|
|
43
|
+
return reader.classify(session_log_text, process=process)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def read_fault_facts(provider: str, records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
47
|
+
"""Extract normalized fault/approval facts from already-parsed provider
|
|
48
|
+
records, using the provider reader. The abnormal track consumes these
|
|
49
|
+
without naming a provider.
|
|
50
|
+
"""
|
|
51
|
+
reader = _reader_for(provider)
|
|
52
|
+
if reader is None or not hasattr(reader, "extract_facts"):
|
|
53
|
+
return []
|
|
54
|
+
facts, _diag = reader.extract_facts(records or [])
|
|
55
|
+
fault_kinds = {"error", "failed", "approval"}
|
|
56
|
+
out: list[dict[str, Any]] = []
|
|
57
|
+
for fact in facts:
|
|
58
|
+
if fact.get("kind") in fault_kinds:
|
|
59
|
+
enriched = dict(fact)
|
|
60
|
+
enriched.setdefault("provider", provider)
|
|
61
|
+
out.append(enriched)
|
|
62
|
+
return out
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _reader_for(provider: str, registry: Any = None) -> Any:
|
|
66
|
+
if provider in _READER_CACHE:
|
|
67
|
+
return _READER_CACHE[provider]
|
|
68
|
+
entry = None
|
|
69
|
+
if isinstance(registry, dict):
|
|
70
|
+
entry = registry.get(provider) if provider in registry else registry
|
|
71
|
+
if not isinstance(entry, dict) or "reader_module" not in entry:
|
|
72
|
+
entry = get_provider_registry(provider)
|
|
73
|
+
if not isinstance(entry, dict):
|
|
74
|
+
return None
|
|
75
|
+
module_name = entry.get("reader_module")
|
|
76
|
+
if not module_name:
|
|
77
|
+
return None
|
|
78
|
+
try:
|
|
79
|
+
module = importlib.import_module(module_name)
|
|
80
|
+
except ImportError:
|
|
81
|
+
return None
|
|
82
|
+
_READER_CACHE[provider] = module
|
|
83
|
+
return module
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
__all__ = ["read_turn_state", "read_fault_facts", "get_provider_registry"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Claude transcript reader — the ONLY Claude-specific turn-state knowledge.
|
|
2
|
+
|
|
3
|
+
Translates Claude transcript JSONL records into normalized lifecycle facts.
|
|
4
|
+
Real markers (see turn-state-markers-evidence.md):
|
|
5
|
+
- assistant message.stop_reason == "tool_use" -> open turn (working)
|
|
6
|
+
- assistant message.stop_reason == "end_turn" -> turn complete (idle)
|
|
7
|
+
- user text == "[Request interrupted by user]" -> interrupted
|
|
8
|
+
- user tool_result is_error == true -> structured tool error
|
|
9
|
+
- system subtype == "api_error" and level=="error" -> provider api error
|
|
10
|
+
Trailing metadata records (stop_hook_summary / turn_duration / last-prompt /
|
|
11
|
+
ai-title / permission-mode / ...) are ignored for the turn verdict.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from team_agent.provider_state import common
|
|
19
|
+
|
|
20
|
+
_INTERRUPT_TEXT = "[Request interrupted by user]"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def extract_facts(records: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
24
|
+
facts: list[dict[str, Any]] = []
|
|
25
|
+
diagnostics: list[dict[str, Any]] = []
|
|
26
|
+
for record in records:
|
|
27
|
+
rtype = record.get("type")
|
|
28
|
+
message = record.get("message")
|
|
29
|
+
if rtype == "assistant" and isinstance(message, dict):
|
|
30
|
+
stop_reason = message.get("stop_reason")
|
|
31
|
+
turn_id = record.get("requestId") or record.get("uuid")
|
|
32
|
+
if stop_reason == "end_turn":
|
|
33
|
+
facts.append({"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "end_turn"})
|
|
34
|
+
elif stop_reason == "tool_use":
|
|
35
|
+
facts.append({"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "tool_use"})
|
|
36
|
+
elif stop_reason == "stop_sequence":
|
|
37
|
+
facts.append({"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "stop_sequence"})
|
|
38
|
+
# other/missing stop_reason on assistant is treated as an open turn fragment
|
|
39
|
+
elif stop_reason is None and isinstance(message.get("content"), list):
|
|
40
|
+
facts.append({"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "assistant_in_flight"})
|
|
41
|
+
elif rtype == "user" and isinstance(message, dict):
|
|
42
|
+
content = message.get("content")
|
|
43
|
+
if _content_has_interrupt(content):
|
|
44
|
+
facts.append({"kind": common.INTERRUPTED, "turn_id": record.get("uuid"), "reason": "user_interrupt"})
|
|
45
|
+
elif _content_has_tool_error(content):
|
|
46
|
+
facts.append({
|
|
47
|
+
"kind": common.ERROR,
|
|
48
|
+
# the turn being retried/affected, stable across records (C8 dedup)
|
|
49
|
+
"turn_id": record.get("parentUuid") or record.get("uuid"),
|
|
50
|
+
"reason": "tool_result_is_error",
|
|
51
|
+
"signature": "tool_result_is_error",
|
|
52
|
+
"raw": record,
|
|
53
|
+
})
|
|
54
|
+
elif rtype == "system" and record.get("subtype") == "api_error" and record.get("level") == "error":
|
|
55
|
+
facts.append({
|
|
56
|
+
"kind": common.ERROR,
|
|
57
|
+
# api_error retries within a session dedup on (signature, session) (C8)
|
|
58
|
+
"turn_id": record.get("sessionId") or record.get("parentUuid") or record.get("uuid"),
|
|
59
|
+
"reason": "api_error",
|
|
60
|
+
"signature": "api_error",
|
|
61
|
+
"raw": record,
|
|
62
|
+
})
|
|
63
|
+
# everything else (metadata, snapshots, titles) is ignored for the verdict
|
|
64
|
+
return facts, diagnostics
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def classify(session_log_text: str, *, process: Any = None) -> dict[str, Any]:
|
|
68
|
+
return common.classify_with_reader(extract_facts, session_log_text, process=process)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _content_has_interrupt(content: Any) -> bool:
|
|
72
|
+
if not isinstance(content, list):
|
|
73
|
+
return False
|
|
74
|
+
for item in content:
|
|
75
|
+
if isinstance(item, dict) and item.get("type") == "text" and item.get("text") == _INTERRUPT_TEXT:
|
|
76
|
+
return True
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _content_has_tool_error(content: Any) -> bool:
|
|
81
|
+
if not isinstance(content, list):
|
|
82
|
+
return False
|
|
83
|
+
for item in content:
|
|
84
|
+
if isinstance(item, dict) and item.get("type") == "tool_result" and item.get("is_error") is True:
|
|
85
|
+
return True
|
|
86
|
+
return False
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Codex rollout reader — the ONLY Codex-specific turn-state knowledge.
|
|
2
|
+
|
|
3
|
+
Translates Codex rollout JSONL (and app-server jsonrpc) records into normalized
|
|
4
|
+
lifecycle facts. Real markers (see turn-state-markers-evidence.md):
|
|
5
|
+
- event_msg payload.type == "task_started" -> open turn (working)
|
|
6
|
+
- event_msg payload.type == "task_complete" -> turn complete (idle)
|
|
7
|
+
- event_msg payload.type == "turn_aborted" reason=="interrupted" -> interrupted
|
|
8
|
+
App-server schema-derived markers:
|
|
9
|
+
- method "turn/completed" params.turn.status == "failed" -> failed/error
|
|
10
|
+
- method ".../requestApproval" -> approval block
|
|
11
|
+
Telemetry (token_count, agent_message, patch_apply_end, ...) is not a close.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from team_agent.provider_state import common
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_facts(records: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
22
|
+
facts: list[dict[str, Any]] = []
|
|
23
|
+
diagnostics: list[dict[str, Any]] = []
|
|
24
|
+
for record in records:
|
|
25
|
+
rtype = record.get("type")
|
|
26
|
+
payload = record.get("payload") if isinstance(record.get("payload"), dict) else None
|
|
27
|
+
if rtype == "event_msg" and payload is not None:
|
|
28
|
+
ptype = payload.get("type")
|
|
29
|
+
turn_id = payload.get("turn_id")
|
|
30
|
+
if ptype == "task_started":
|
|
31
|
+
facts.append({"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "task_started"})
|
|
32
|
+
elif ptype == "task_complete":
|
|
33
|
+
facts.append({"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "task_complete"})
|
|
34
|
+
elif ptype == "turn_aborted" and payload.get("reason") == "interrupted":
|
|
35
|
+
facts.append({"kind": common.INTERRUPTED, "turn_id": turn_id, "reason": "interrupted"})
|
|
36
|
+
elif ptype == "turn_aborted":
|
|
37
|
+
facts.append({"kind": common.INTERRUPTED, "turn_id": turn_id, "reason": str(payload.get("reason") or "aborted")})
|
|
38
|
+
elif _is_app_server(record):
|
|
39
|
+
fact = _app_server_fact(record)
|
|
40
|
+
if fact is not None:
|
|
41
|
+
facts.append(fact)
|
|
42
|
+
# response_item (assistant/user messages), token_count, etc. are not verdicts
|
|
43
|
+
return facts, diagnostics
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def classify(session_log_text: str, *, process: Any = None) -> dict[str, Any]:
|
|
47
|
+
return common.classify_with_reader(extract_facts, session_log_text, process=process)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _is_app_server(record: dict[str, Any]) -> bool:
|
|
51
|
+
return record.get("jsonrpc") == "2.0" and isinstance(record.get("method"), str)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _app_server_fact(record: dict[str, Any]) -> dict[str, Any] | None:
|
|
55
|
+
method = str(record.get("method") or "")
|
|
56
|
+
params = record.get("params") if isinstance(record.get("params"), dict) else {}
|
|
57
|
+
if method == "turn/completed":
|
|
58
|
+
turn = params.get("turn") if isinstance(params.get("turn"), dict) else {}
|
|
59
|
+
status = turn.get("status")
|
|
60
|
+
turn_id = turn.get("id")
|
|
61
|
+
if status == "failed":
|
|
62
|
+
return {
|
|
63
|
+
"kind": common.FAILED,
|
|
64
|
+
"turn_id": turn_id,
|
|
65
|
+
"reason": "turn_failed",
|
|
66
|
+
"signature": "turn_failed",
|
|
67
|
+
"raw": record,
|
|
68
|
+
}
|
|
69
|
+
if status == "completed":
|
|
70
|
+
return {"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "completed"}
|
|
71
|
+
if status == "interrupted":
|
|
72
|
+
return {"kind": common.INTERRUPTED, "turn_id": turn_id, "reason": "interrupted"}
|
|
73
|
+
if status == "inProgress":
|
|
74
|
+
return {"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "in_progress"}
|
|
75
|
+
return None
|
|
76
|
+
if method.endswith("requestApproval"):
|
|
77
|
+
return {
|
|
78
|
+
"kind": common.APPROVAL,
|
|
79
|
+
"turn_id": params.get("turnId") or params.get("turn_id"),
|
|
80
|
+
"reason": "approval_required",
|
|
81
|
+
"signature": "approval_required",
|
|
82
|
+
"raw": record,
|
|
83
|
+
}
|
|
84
|
+
return None
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Shared, provider-neutral plumbing for the turn-state readers.
|
|
2
|
+
|
|
3
|
+
The per-provider readers (claude.py, codex.py) only translate their own record
|
|
4
|
+
shapes into a normalized list of lifecycle facts; everything else — JSONL
|
|
5
|
+
tail parsing, metadata filtering wiring, the verdict decision, and the
|
|
6
|
+
process-identity liveness guard — lives here so it is written once.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from typing import Any, Callable
|
|
13
|
+
|
|
14
|
+
# Normalized lifecycle fact kinds emitted by every reader.
|
|
15
|
+
TURN_OPEN = "turn_open"
|
|
16
|
+
TURN_COMPLETE = "turn_complete"
|
|
17
|
+
INTERRUPTED = "interrupted"
|
|
18
|
+
FAILED = "failed"
|
|
19
|
+
APPROVAL = "approval"
|
|
20
|
+
ERROR = "error" # non-closing structured error (e.g. transient api retry / tool is_error)
|
|
21
|
+
|
|
22
|
+
_CLOSING = {TURN_COMPLETE, INTERRUPTED, FAILED}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_jsonl(text: str) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
26
|
+
"""Parse JSONL text into (records, parse_diagnostics).
|
|
27
|
+
|
|
28
|
+
Lines that are blank are skipped. Lines that are not valid JSON objects are
|
|
29
|
+
collected as diagnostics rather than raising — the caller decides whether a
|
|
30
|
+
populated diagnostics list with zero usable records means ``unknown``.
|
|
31
|
+
"""
|
|
32
|
+
records: list[dict[str, Any]] = []
|
|
33
|
+
diagnostics: list[dict[str, Any]] = []
|
|
34
|
+
for lineno, raw in enumerate(text.splitlines(), start=1):
|
|
35
|
+
line = raw.strip()
|
|
36
|
+
if not line:
|
|
37
|
+
continue
|
|
38
|
+
try:
|
|
39
|
+
obj = json.loads(line)
|
|
40
|
+
except (ValueError, TypeError):
|
|
41
|
+
diagnostics.append({"kind": "json_decode_error", "line": lineno})
|
|
42
|
+
continue
|
|
43
|
+
if not isinstance(obj, dict):
|
|
44
|
+
diagnostics.append({"kind": "non_object_record", "line": lineno})
|
|
45
|
+
continue
|
|
46
|
+
records.append(obj)
|
|
47
|
+
return records, diagnostics
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def decide_state(
|
|
51
|
+
facts: list[dict[str, Any]],
|
|
52
|
+
*,
|
|
53
|
+
process: Any = None,
|
|
54
|
+
parse_diagnostics: list[dict[str, Any]] | None = None,
|
|
55
|
+
had_records: bool,
|
|
56
|
+
extra_diagnostics: list[dict[str, Any]] | None = None,
|
|
57
|
+
) -> dict[str, Any]:
|
|
58
|
+
"""Turn a normalized fact stream into the public classify result.
|
|
59
|
+
|
|
60
|
+
Verdict = the LAST lifecycle fact, not the last physical record. An open
|
|
61
|
+
turn (a ``turn_open`` not yet closed) is a positive "still working" fact
|
|
62
|
+
that survives arbitrary file silence (Gap 32 C14); the only thing that can
|
|
63
|
+
demote it is a failed process-identity guard (Gap 32 C4).
|
|
64
|
+
"""
|
|
65
|
+
diagnostics = list(parse_diagnostics or []) + list(extra_diagnostics or [])
|
|
66
|
+
|
|
67
|
+
lifecycle = [f for f in facts if f.get("kind") in (_CLOSING | {TURN_OPEN, APPROVAL})]
|
|
68
|
+
if not lifecycle:
|
|
69
|
+
# No turn-lifecycle fact at all. If the input was unreadable/empty or a
|
|
70
|
+
# changed format with no recognizable records, fail safe to unknown (C5).
|
|
71
|
+
reason = "no_turn_lifecycle_fact"
|
|
72
|
+
if not had_records:
|
|
73
|
+
reason = "unreadable_or_empty"
|
|
74
|
+
elif diagnostics:
|
|
75
|
+
reason = "unrecognized_format"
|
|
76
|
+
return _result("unknown", None, reason, "session_file", [], diagnostics)
|
|
77
|
+
|
|
78
|
+
last = lifecycle[-1]
|
|
79
|
+
kind = last.get("kind")
|
|
80
|
+
turn_id = last.get("turn_id")
|
|
81
|
+
reason = str(last.get("reason") or kind)
|
|
82
|
+
|
|
83
|
+
if kind == TURN_COMPLETE:
|
|
84
|
+
return _result("idle", turn_id, reason or "turn_complete", "session_file", [], diagnostics)
|
|
85
|
+
if kind == INTERRUPTED:
|
|
86
|
+
return _result("idle_interrupted", turn_id, reason or "interrupted", "session_file", ["interrupted"], diagnostics)
|
|
87
|
+
if kind == FAILED:
|
|
88
|
+
return _result("abnormal", turn_id, reason or "turn_failed", "session_file", ["turn_failed"], diagnostics)
|
|
89
|
+
if kind == APPROVAL:
|
|
90
|
+
return _result("blocked_on_human", turn_id, reason or "approval_required", "session_file", ["awaiting_approval"], diagnostics)
|
|
91
|
+
|
|
92
|
+
# kind == TURN_OPEN with no later close → open turn. To declare "working" we
|
|
93
|
+
# must POSITIVELY confirm the recorded process is still alive (C4 fail-safe);
|
|
94
|
+
# missing/partial identity cannot be optimistically read as working.
|
|
95
|
+
verdict, live_reason, live_diag = process_liveness(process)
|
|
96
|
+
if live_diag:
|
|
97
|
+
diagnostics = diagnostics + [live_diag]
|
|
98
|
+
if verdict == "alive":
|
|
99
|
+
return _result("working", turn_id, "open_turn", "session_file", [], diagnostics)
|
|
100
|
+
if verdict == "dead":
|
|
101
|
+
return _result("abnormal", turn_id, "crashed_mid_turn", "process_guard", ["crashed_mid_turn", live_reason], diagnostics)
|
|
102
|
+
# unverifiable: cannot confirm alive → fail safe to unknown, never working.
|
|
103
|
+
return _result("unknown", turn_id, "process_identity_unverified", "process_guard", ["process_identity_unverified", live_reason], diagnostics)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
_STRONG_IDENTITY_FIELDS = ("start_time", "cmdline", "create_time")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def process_liveness(process: Any) -> tuple[str, str, dict[str, Any] | None]:
|
|
110
|
+
"""Process-identity liveness guard (Gap 32 C4) — three-valued.
|
|
111
|
+
|
|
112
|
+
Returns (verdict, reason, diagnostic) where verdict is one of:
|
|
113
|
+
- ``"alive"`` — positively confirmed the same process is running
|
|
114
|
+
- ``"dead"`` — confirmed replaced/exited (identity mismatch or flag)
|
|
115
|
+
- ``"unverifiable"`` — identity missing/partial; CANNOT be read as working
|
|
116
|
+
|
|
117
|
+
Identity, not bare PID: aliveness must be affirmatively confirmed by a strong
|
|
118
|
+
identity field (start_time / cmdline / create_time) present and equal in BOTH
|
|
119
|
+
the recorded and the current snapshot. Missing/partial info is fail-safe
|
|
120
|
+
unverifiable, never optimistically "alive".
|
|
121
|
+
|
|
122
|
+
Accepted ``process`` shapes (any one):
|
|
123
|
+
- None / non-dict → unverifiable
|
|
124
|
+
- {"alive"|"running": bool} → explicit
|
|
125
|
+
- {"identity_match": bool} → explicit identity verdict
|
|
126
|
+
- {"expected"|"recorded": {...}, "current"|"observed": {...}}
|
|
127
|
+
"""
|
|
128
|
+
if process is None or not isinstance(process, dict):
|
|
129
|
+
return "unverifiable", "process_identity_missing", {"kind": "process_identity_unverified"}
|
|
130
|
+
if process.get("alive") is False or process.get("running") is False:
|
|
131
|
+
return "dead", "process_not_running", {"kind": "process_dead", "detail": "not_running"}
|
|
132
|
+
if process.get("identity_match") is False:
|
|
133
|
+
return "dead", "process_identity_mismatch", {"kind": "process_identity_mismatch"}
|
|
134
|
+
if process.get("alive") is True or process.get("running") is True or process.get("identity_match") is True:
|
|
135
|
+
return "alive", "process_alive", None
|
|
136
|
+
recorded = process.get("recorded") if isinstance(process.get("recorded"), dict) else process.get("expected")
|
|
137
|
+
current = process.get("current") if isinstance(process.get("current"), dict) else process.get("observed")
|
|
138
|
+
if not (isinstance(recorded, dict) and isinstance(current, dict)):
|
|
139
|
+
return "unverifiable", "process_identity_partial", {"kind": "process_identity_unverified"}
|
|
140
|
+
if current.get("alive") is False or current.get("running") is False:
|
|
141
|
+
return "dead", "process_not_running", {"kind": "process_dead", "detail": "current_not_running"}
|
|
142
|
+
# Any shared strong identity field that DIFFERS = confirmed replacement.
|
|
143
|
+
for key in _STRONG_IDENTITY_FIELDS:
|
|
144
|
+
if key in recorded and key in current and recorded.get(key) != current.get(key):
|
|
145
|
+
return "dead", f"process_identity_mismatch:{key}", {
|
|
146
|
+
"kind": "process_identity_mismatch",
|
|
147
|
+
"field": key,
|
|
148
|
+
"recorded": recorded.get(key),
|
|
149
|
+
"current": current.get(key),
|
|
150
|
+
}
|
|
151
|
+
# Require at least one strong identity field present+equal in BOTH, with no
|
|
152
|
+
# recorded strong field missing from current (else we cannot confirm).
|
|
153
|
+
recorded_strong = [k for k in _STRONG_IDENTITY_FIELDS if k in recorded]
|
|
154
|
+
confirmed = [k for k in recorded_strong if k in current and recorded.get(k) == current.get(k)]
|
|
155
|
+
missing = [k for k in recorded_strong if k not in current]
|
|
156
|
+
if confirmed and not missing:
|
|
157
|
+
return "alive", "process_identity_match", None
|
|
158
|
+
return "unverifiable", "process_identity_partial", {
|
|
159
|
+
"kind": "process_identity_unverified",
|
|
160
|
+
"recorded_strong": recorded_strong,
|
|
161
|
+
"confirmed": confirmed,
|
|
162
|
+
"missing": missing,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def process_is_live(process: Any) -> tuple[bool, str, dict[str, Any] | None]:
|
|
167
|
+
"""Boolean wrapper used by conservative callers (e.g. whole-team-gone): a
|
|
168
|
+
process is treated as live unless it is CONFIRMED dead. Unverifiable counts
|
|
169
|
+
as live here so we never falsely declare the team gone."""
|
|
170
|
+
verdict, reason, diag = process_liveness(process)
|
|
171
|
+
return (verdict != "dead"), reason, diag
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _result(
|
|
175
|
+
state: str,
|
|
176
|
+
turn_id: str | None,
|
|
177
|
+
reason: str,
|
|
178
|
+
source: str,
|
|
179
|
+
annotations: list[str],
|
|
180
|
+
diagnostics: list[dict[str, Any]],
|
|
181
|
+
) -> dict[str, Any]:
|
|
182
|
+
return {
|
|
183
|
+
"state": state,
|
|
184
|
+
"turn_id": turn_id,
|
|
185
|
+
"reason": reason,
|
|
186
|
+
"source": source,
|
|
187
|
+
"annotations": list(annotations),
|
|
188
|
+
"diagnostics": list(diagnostics),
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def classify_with_reader(
|
|
193
|
+
extract_facts: Callable[[list[dict[str, Any]]], tuple[list[dict[str, Any]], list[dict[str, Any]]]],
|
|
194
|
+
session_log_text: str,
|
|
195
|
+
*,
|
|
196
|
+
process: Any = None,
|
|
197
|
+
) -> dict[str, Any]:
|
|
198
|
+
"""Run a provider reader's fact extractor through the shared pipeline."""
|
|
199
|
+
records, parse_diag = parse_jsonl(session_log_text or "")
|
|
200
|
+
facts, extra_diag = extract_facts(records)
|
|
201
|
+
return decide_state(
|
|
202
|
+
facts,
|
|
203
|
+
process=process,
|
|
204
|
+
parse_diagnostics=parse_diag,
|
|
205
|
+
had_records=bool(records),
|
|
206
|
+
extra_diagnostics=extra_diag,
|
|
207
|
+
)
|