@team-agent/installer 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/package.json +1 -1
  2. package/src/team_agent/abnormal_track.py +253 -0
  3. package/src/team_agent/cli/commands.py +17 -1
  4. package/src/team_agent/cli/parser.py +2 -2
  5. package/src/team_agent/compiler.py +1 -1
  6. package/src/team_agent/coordinator/lifecycle.py +20 -2
  7. package/src/team_agent/display/__init__.py +31 -0
  8. package/src/team_agent/display/adaptive.py +425 -0
  9. package/src/team_agent/display/backend.py +46 -0
  10. package/src/team_agent/display/close.py +6 -0
  11. package/src/team_agent/display/rebuild.py +102 -0
  12. package/src/team_agent/display/tiling.py +156 -0
  13. package/src/team_agent/display/worker_window.py +4 -0
  14. package/src/team_agent/display/workspace.py +36 -127
  15. package/src/team_agent/idle_predicate.py +200 -0
  16. package/src/team_agent/idle_takeover.py +59 -0
  17. package/src/team_agent/idle_takeover_wiring.py +111 -0
  18. package/src/team_agent/launch/core.py +13 -4
  19. package/src/team_agent/leader/__init__.py +444 -61
  20. package/src/team_agent/message_store/agent_health.py +6 -2
  21. package/src/team_agent/message_store/core.py +51 -18
  22. package/src/team_agent/message_store/leader_notification_log.py +63 -38
  23. package/src/team_agent/message_store/result_watchers.py +17 -11
  24. package/src/team_agent/message_store/schema.py +19 -2
  25. package/src/team_agent/message_store/schema_migration.py +386 -0
  26. package/src/team_agent/messaging/delivery.py +45 -2
  27. package/src/team_agent/messaging/leader_panes.py +115 -21
  28. package/src/team_agent/messaging/send.py +33 -0
  29. package/src/team_agent/messaging/tmux_io.py +49 -10
  30. package/src/team_agent/messaging/trust_auto_answer.py +11 -3
  31. package/src/team_agent/provider_state/README.md +78 -0
  32. package/src/team_agent/provider_state/__init__.py +86 -0
  33. package/src/team_agent/provider_state/claude.py +86 -0
  34. package/src/team_agent/provider_state/codex.py +84 -0
  35. package/src/team_agent/provider_state/common.py +207 -0
  36. package/src/team_agent/provider_state/registry.py +118 -0
  37. package/src/team_agent/restart/orchestration.py +9 -9
  38. package/src/team_agent/runtime.py +62 -12
  39. package/src/team_agent/spec.py +4 -3
  40. package/src/team_agent/wake.py +58 -0
@@ -32,6 +32,49 @@ def _tmux_inject_text(
32
32
  *,
33
33
  bypass_non_input_gate: bool = False,
34
34
  ) -> dict[str, Any]:
35
+ # Round-5 follow-up: empty-text Enter path (used by trust auto-answer to
36
+ # accept Codex's default `1. Yes, continue` choice with a plain Enter).
37
+ # tmux rejects set-buffer / paste-buffer of an empty string, so the
38
+ # buffer-paste route would leave the trust prompt stuck. Issue
39
+ # `send-keys -t <target> <submit_key>` directly and bypass the buffer
40
+ # path entirely.
41
+ if text == "":
42
+ proc = run_cmd(["tmux", "send-keys", "-t", target, submit_key], timeout=10)
43
+ if proc.returncode != 0:
44
+ return {
45
+ "ok": False,
46
+ "stage": "send-keys",
47
+ "error": proc.stderr.strip() or "tmux send-keys failed",
48
+ "attempts": [
49
+ {
50
+ "attempt": 1,
51
+ "submitted": False,
52
+ "verification": "send_keys_failed",
53
+ "submit_key": submit_key,
54
+ }
55
+ ],
56
+ "verification": "send_keys_failed",
57
+ }
58
+ return {
59
+ "ok": True,
60
+ "stage": "submitted",
61
+ "visible": True,
62
+ "submitted": True,
63
+ "verification": "empty_text_send_keys",
64
+ "submit_verification": f"{submit_key}_sent_direct",
65
+ "turn_verification": "not_required",
66
+ "attempts": [
67
+ {
68
+ "attempt": 1,
69
+ "submitted": True,
70
+ "verification": "empty_text_send_keys",
71
+ "submit_key": submit_key,
72
+ }
73
+ ],
74
+ "submit_attempts": [
75
+ {"attempt": 1, "submitted": True, "verification": "send_keys"}
76
+ ],
77
+ }
35
78
  token_match = re.search(r"\[team-agent-token:([^\]]+)\]", text)
36
79
  token = token_match.group(1) if token_match else ""
37
80
  attempt_log: list[dict[str, Any]] = []
@@ -134,6 +177,11 @@ def _tmux_inject_text(
134
177
  "submit_attempts": submit.get("attempts"),
135
178
  }
136
179
  submit_verification = _leader_submit_verification(submit.get("verification"), verification, submit_key)
180
+ # Gap 42: paste+submit success is authoritative for delivery. The post-submit
181
+ # turn-boundary probe is observation metadata only, never a delivery gate — a
182
+ # busy / compacting recipient that has not yet shown a new prompt marker is
183
+ # still a successful delivery. Real paste/submit failures are caught and
184
+ # returned above; this point is only reached after submit reported ok.
137
185
  turn_visible, turn_verification, turn_capture = _wait_for_leader_new_turn(
138
186
  target,
139
187
  text,
@@ -142,16 +190,7 @@ def _tmux_inject_text(
142
190
  timeout=2.0,
143
191
  )
144
192
  if not turn_visible:
145
- return {
146
- "ok": False,
147
- "stage": "turn-boundary-verification",
148
- "error": f"leader turn boundary not verified: {turn_verification}",
149
- "attempts": attempt_log,
150
- "verification": verification,
151
- "submit_verification": submit_verification,
152
- "turn_verification": turn_verification,
153
- "submit_attempts": submit.get("attempts"),
154
- }
193
+ turn_verification = "not_yet_observed"
155
194
  return {
156
195
  "ok": True,
157
196
  "stage": "submitted",
@@ -18,14 +18,22 @@ def retry_injection_after_trust_auto_answer(
18
18
  buffer_name: str,
19
19
  provider: str,
20
20
  ) -> dict[str, Any]:
21
- from team_agent.messaging.delivery import _wait_for_trust_prompt_dismissal
21
+ from team_agent.messaging.delivery import _tmux_pane_width, _wait_for_trust_prompt_dismissal
22
22
  from team_agent.messaging.leader_panes import attempt_trust_auto_answer
23
+ pane_target = injection.get("pane_id") or target
24
+ # Live wiring: query tmux pane width now and pass via state["pane_width"]
25
+ # (symmetric with _deliver_pending_message). Fail-safe on query failure —
26
+ # leave pane_width absent so the matcher falls back to exact equality.
27
+ width_query = _tmux_pane_width(pane_target)
28
+ trust_state = dict(state) if isinstance(state, dict) else {}
29
+ if width_query.get("ok"):
30
+ trust_state["pane_width"] = width_query["pane_width"]
23
31
  answer = attempt_trust_auto_answer(
24
32
  workspace,
25
- injection.get("pane_id") or target,
33
+ pane_target,
26
34
  injection.get("pane_capture_tail") or "",
27
35
  event_log,
28
- state=state,
36
+ state=trust_state,
29
37
  )
30
38
  if not answer.get("answered"):
31
39
  return injection
@@ -0,0 +1,78 @@
1
+ # Adding a provider idle/turn-state adapter
2
+
3
+ Gap 32 decides every node's idle/working/abnormal state from a deterministic
4
+ FILE FACT — the provider's own session-log/rollout turn-lifecycle records — never
5
+ from the pane screen. The predicate, abnormal track, and wake layers are
6
+ **provider-neutral and reused unchanged**. To support a brand-new CLI you fill the
7
+ small checklist below; you do not touch any neutral module.
8
+
9
+ ## What you add (only two places)
10
+
11
+ 1. `src/team_agent/provider_state/<provider>.py` — a thin reader that translates
12
+ that CLI's session records into normalized lifecycle facts.
13
+ 2. one entry in `src/team_agent/provider_state/registry.py` — pure infra DATA.
14
+
15
+ Everything else (`idle_predicate.py`, `abnormal_track.py`, `wake.py`,
16
+ `idle_takeover.py`) is provider-neutral and must stay free of provider names
17
+ (there is a grep test, C6).
18
+
19
+ ## The checklist
20
+
21
+ ### 1. Session/rollout file location
22
+ - Where does this CLI write its per-session log? (root dir + path layout)
23
+ - How does the framework already learn each agent's path? (it is captured into
24
+ runtime state per agent as `rollout_path`; confirm yours lands there.)
25
+ - Record it under the registry entry `file_location`.
26
+
27
+ ### 2. Turn-lifecycle event types (do the empirical capture FIRST)
28
+ Capture REAL records from a live session for each state and record the exact
29
+ record `type`/field. These become the contract fixtures (real-fixture-first):
30
+ - **turn-started / open turn** — the marker that a turn is in flight.
31
+ - **turn-complete** — the close that means idle.
32
+ - **interrupted** — user ESC / abort (idle_interrupted, idle + red note).
33
+ - **blocked / approval** — awaiting a human decision (blocked_on_human).
34
+ - **error / failed** — a structured terminal fault record.
35
+ Implement these as `extract_facts(records) -> (facts, diagnostics)` in your reader,
36
+ emitting `team_agent.provider_state.common` fact kinds: `TURN_OPEN`,
37
+ `TURN_COMPLETE`, `INTERRUPTED`, `FAILED`, `APPROVAL`, `ERROR`. Fault facts should
38
+ carry `signature`, `turn_id`, and `raw` (the original record). Filter out trailing
39
+ metadata/telemetry records so the verdict is the last LIFECYCLE fact, not the last
40
+ physical line.
41
+
42
+ Reference markers already implemented:
43
+ - Claude transcript: assistant `stop_reason==end_turn` (idle) / `==tool_use`
44
+ (working); user text `[Request interrupted by user]` (interrupted); user
45
+ `tool_result is_error==true` and system `subtype==api_error,level==error` (faults).
46
+ - Codex rollout: `event_msg payload.type==task_started|task_complete`;
47
+ `turn_aborted reason==interrupted`; app-server `turn.status==failed` and
48
+ `*/requestApproval`.
49
+
50
+ ### 3. Black/white list seed entries
51
+ - `error_lists.whitelist` — record/string patterns that are benign → skip.
52
+ - `error_lists.blacklist` — known error signatures → notify (`api error`,
53
+ `rate limit`, `overloaded`, traceback/panic, provider `failed`, ...).
54
+ - Precedence is whitelist > blacklist > default-notify (catch-bias for structured
55
+ faults only). Lists are DATA — adding a pattern is one edit + one fixture.
56
+
57
+ ### 4. Optional hook accelerator
58
+ - Does the CLI expose hooks that fire on turn boundaries (e.g. a `Stop`/`Notify`
59
+ program)? If so they can push a fact row to wake the watcher faster — but the
60
+ file fact remains the source of truth (the hook is validated against the file,
61
+ never the sole signal).
62
+
63
+ ### 5. Process/identity facts for the liveness guard
64
+ - How to read the provider process identity (start-time / cmdline) so an open
65
+ turn whose process was replaced (PID reuse) classifies as `crashed_mid_turn`,
66
+ never eternal `working` (C4). `provider_state.common.process_is_live` already
67
+ implements the comparison given `{"expected": {...}, "current": {...}}`.
68
+
69
+ ## Reused unchanged (do NOT modify per provider)
70
+ - `idle_predicate.evaluate_takeover_reminder` — all-idle + arm-after-delegation +
71
+ monotonic debounce + edge ack.
72
+ - `abnormal_track.process_abnormal_records` / `detect_whole_team_gone` — dedup,
73
+ catch-bias, coordinator-independent whole-team-gone.
74
+ - `wake` — file-change watch + mtime gate.
75
+ - `idle_takeover` — the public facade.
76
+
77
+ If you find yourself editing a neutral module to add a provider, stop — the fact
78
+ you need belongs in the reader or the registry entry instead.
@@ -0,0 +1,86 @@
1
+ """Provider turn-state readers behind one shared interface (Gap 32 §6).
2
+
3
+ ``read_turn_state`` is the single entry the rest of the runtime uses; provider
4
+ dispatch happens here (and in registry data), so the neutral predicate /
5
+ abnormal / wake modules never name a provider.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import importlib
11
+ from typing import Any
12
+
13
+ from team_agent.provider_state.registry import get_provider_registry
14
+
15
+ _READER_CACHE: dict[str, Any] = {}
16
+
17
+
18
+ def read_turn_state(
19
+ provider: str,
20
+ session_log_text: str,
21
+ *,
22
+ process: Any = None,
23
+ file_silence_seconds: float = 0,
24
+ registry: Any = None,
25
+ ) -> dict[str, Any]:
26
+ """Classify a node's turn state from its provider session-log text.
27
+
28
+ Returns the stable dict shape: state / turn_id / reason / source /
29
+ annotations / diagnostics. A missing/unknown provider or an unreadable
30
+ file fails safe to ``unknown`` (never idle, Gap 32 C5).
31
+ """
32
+ _ = file_silence_seconds # open-turn beats silence (C14); silence never forces idle
33
+ reader = _reader_for(provider, registry)
34
+ if reader is None:
35
+ return {
36
+ "state": "unknown",
37
+ "turn_id": None,
38
+ "reason": "unknown_provider",
39
+ "source": "registry",
40
+ "annotations": [],
41
+ "diagnostics": [{"kind": "unknown_provider", "provider": provider}],
42
+ }
43
+ return reader.classify(session_log_text, process=process)
44
+
45
+
46
+ def read_fault_facts(provider: str, records: list[dict[str, Any]]) -> list[dict[str, Any]]:
47
+ """Extract normalized fault/approval facts from already-parsed provider
48
+ records, using the provider reader. The abnormal track consumes these
49
+ without naming a provider.
50
+ """
51
+ reader = _reader_for(provider)
52
+ if reader is None or not hasattr(reader, "extract_facts"):
53
+ return []
54
+ facts, _diag = reader.extract_facts(records or [])
55
+ fault_kinds = {"error", "failed", "approval"}
56
+ out: list[dict[str, Any]] = []
57
+ for fact in facts:
58
+ if fact.get("kind") in fault_kinds:
59
+ enriched = dict(fact)
60
+ enriched.setdefault("provider", provider)
61
+ out.append(enriched)
62
+ return out
63
+
64
+
65
+ def _reader_for(provider: str, registry: Any = None) -> Any:
66
+ if provider in _READER_CACHE:
67
+ return _READER_CACHE[provider]
68
+ entry = None
69
+ if isinstance(registry, dict):
70
+ entry = registry.get(provider) if provider in registry else registry
71
+ if not isinstance(entry, dict) or "reader_module" not in entry:
72
+ entry = get_provider_registry(provider)
73
+ if not isinstance(entry, dict):
74
+ return None
75
+ module_name = entry.get("reader_module")
76
+ if not module_name:
77
+ return None
78
+ try:
79
+ module = importlib.import_module(module_name)
80
+ except ImportError:
81
+ return None
82
+ _READER_CACHE[provider] = module
83
+ return module
84
+
85
+
86
+ __all__ = ["read_turn_state", "read_fault_facts", "get_provider_registry"]
@@ -0,0 +1,86 @@
1
+ """Claude transcript reader — the ONLY Claude-specific turn-state knowledge.
2
+
3
+ Translates Claude transcript JSONL records into normalized lifecycle facts.
4
+ Real markers (see turn-state-markers-evidence.md):
5
+ - assistant message.stop_reason == "tool_use" -> open turn (working)
6
+ - assistant message.stop_reason == "end_turn" -> turn complete (idle)
7
+ - user text == "[Request interrupted by user]" -> interrupted
8
+ - user tool_result is_error == true -> structured tool error
9
+ - system subtype == "api_error" and level=="error" -> provider api error
10
+ Trailing metadata records (stop_hook_summary / turn_duration / last-prompt /
11
+ ai-title / permission-mode / ...) are ignored for the turn verdict.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any
17
+
18
+ from team_agent.provider_state import common
19
+
20
+ _INTERRUPT_TEXT = "[Request interrupted by user]"
21
+
22
+
23
+ def extract_facts(records: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
24
+ facts: list[dict[str, Any]] = []
25
+ diagnostics: list[dict[str, Any]] = []
26
+ for record in records:
27
+ rtype = record.get("type")
28
+ message = record.get("message")
29
+ if rtype == "assistant" and isinstance(message, dict):
30
+ stop_reason = message.get("stop_reason")
31
+ turn_id = record.get("requestId") or record.get("uuid")
32
+ if stop_reason == "end_turn":
33
+ facts.append({"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "end_turn"})
34
+ elif stop_reason == "tool_use":
35
+ facts.append({"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "tool_use"})
36
+ elif stop_reason == "stop_sequence":
37
+ facts.append({"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "stop_sequence"})
38
+ # other/missing stop_reason on assistant is treated as an open turn fragment
39
+ elif stop_reason is None and isinstance(message.get("content"), list):
40
+ facts.append({"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "assistant_in_flight"})
41
+ elif rtype == "user" and isinstance(message, dict):
42
+ content = message.get("content")
43
+ if _content_has_interrupt(content):
44
+ facts.append({"kind": common.INTERRUPTED, "turn_id": record.get("uuid"), "reason": "user_interrupt"})
45
+ elif _content_has_tool_error(content):
46
+ facts.append({
47
+ "kind": common.ERROR,
48
+ # the turn being retried/affected, stable across records (C8 dedup)
49
+ "turn_id": record.get("parentUuid") or record.get("uuid"),
50
+ "reason": "tool_result_is_error",
51
+ "signature": "tool_result_is_error",
52
+ "raw": record,
53
+ })
54
+ elif rtype == "system" and record.get("subtype") == "api_error" and record.get("level") == "error":
55
+ facts.append({
56
+ "kind": common.ERROR,
57
+ # api_error retries within a session dedup on (signature, session) (C8)
58
+ "turn_id": record.get("sessionId") or record.get("parentUuid") or record.get("uuid"),
59
+ "reason": "api_error",
60
+ "signature": "api_error",
61
+ "raw": record,
62
+ })
63
+ # everything else (metadata, snapshots, titles) is ignored for the verdict
64
+ return facts, diagnostics
65
+
66
+
67
+ def classify(session_log_text: str, *, process: Any = None) -> dict[str, Any]:
68
+ return common.classify_with_reader(extract_facts, session_log_text, process=process)
69
+
70
+
71
+ def _content_has_interrupt(content: Any) -> bool:
72
+ if not isinstance(content, list):
73
+ return False
74
+ for item in content:
75
+ if isinstance(item, dict) and item.get("type") == "text" and item.get("text") == _INTERRUPT_TEXT:
76
+ return True
77
+ return False
78
+
79
+
80
+ def _content_has_tool_error(content: Any) -> bool:
81
+ if not isinstance(content, list):
82
+ return False
83
+ for item in content:
84
+ if isinstance(item, dict) and item.get("type") == "tool_result" and item.get("is_error") is True:
85
+ return True
86
+ return False
@@ -0,0 +1,84 @@
1
+ """Codex rollout reader — the ONLY Codex-specific turn-state knowledge.
2
+
3
+ Translates Codex rollout JSONL (and app-server jsonrpc) records into normalized
4
+ lifecycle facts. Real markers (see turn-state-markers-evidence.md):
5
+ - event_msg payload.type == "task_started" -> open turn (working)
6
+ - event_msg payload.type == "task_complete" -> turn complete (idle)
7
+ - event_msg payload.type == "turn_aborted" reason=="interrupted" -> interrupted
8
+ App-server schema-derived markers:
9
+ - method "turn/completed" params.turn.status == "failed" -> failed/error
10
+ - method ".../requestApproval" -> approval block
11
+ Telemetry (token_count, agent_message, patch_apply_end, ...) is not a close.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any
17
+
18
+ from team_agent.provider_state import common
19
+
20
+
21
+ def extract_facts(records: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
22
+ facts: list[dict[str, Any]] = []
23
+ diagnostics: list[dict[str, Any]] = []
24
+ for record in records:
25
+ rtype = record.get("type")
26
+ payload = record.get("payload") if isinstance(record.get("payload"), dict) else None
27
+ if rtype == "event_msg" and payload is not None:
28
+ ptype = payload.get("type")
29
+ turn_id = payload.get("turn_id")
30
+ if ptype == "task_started":
31
+ facts.append({"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "task_started"})
32
+ elif ptype == "task_complete":
33
+ facts.append({"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "task_complete"})
34
+ elif ptype == "turn_aborted" and payload.get("reason") == "interrupted":
35
+ facts.append({"kind": common.INTERRUPTED, "turn_id": turn_id, "reason": "interrupted"})
36
+ elif ptype == "turn_aborted":
37
+ facts.append({"kind": common.INTERRUPTED, "turn_id": turn_id, "reason": str(payload.get("reason") or "aborted")})
38
+ elif _is_app_server(record):
39
+ fact = _app_server_fact(record)
40
+ if fact is not None:
41
+ facts.append(fact)
42
+ # response_item (assistant/user messages), token_count, etc. are not verdicts
43
+ return facts, diagnostics
44
+
45
+
46
+ def classify(session_log_text: str, *, process: Any = None) -> dict[str, Any]:
47
+ return common.classify_with_reader(extract_facts, session_log_text, process=process)
48
+
49
+
50
+ def _is_app_server(record: dict[str, Any]) -> bool:
51
+ return record.get("jsonrpc") == "2.0" and isinstance(record.get("method"), str)
52
+
53
+
54
+ def _app_server_fact(record: dict[str, Any]) -> dict[str, Any] | None:
55
+ method = str(record.get("method") or "")
56
+ params = record.get("params") if isinstance(record.get("params"), dict) else {}
57
+ if method == "turn/completed":
58
+ turn = params.get("turn") if isinstance(params.get("turn"), dict) else {}
59
+ status = turn.get("status")
60
+ turn_id = turn.get("id")
61
+ if status == "failed":
62
+ return {
63
+ "kind": common.FAILED,
64
+ "turn_id": turn_id,
65
+ "reason": "turn_failed",
66
+ "signature": "turn_failed",
67
+ "raw": record,
68
+ }
69
+ if status == "completed":
70
+ return {"kind": common.TURN_COMPLETE, "turn_id": turn_id, "reason": "completed"}
71
+ if status == "interrupted":
72
+ return {"kind": common.INTERRUPTED, "turn_id": turn_id, "reason": "interrupted"}
73
+ if status == "inProgress":
74
+ return {"kind": common.TURN_OPEN, "turn_id": turn_id, "reason": "in_progress"}
75
+ return None
76
+ if method.endswith("requestApproval"):
77
+ return {
78
+ "kind": common.APPROVAL,
79
+ "turn_id": params.get("turnId") or params.get("turn_id"),
80
+ "reason": "approval_required",
81
+ "signature": "approval_required",
82
+ "raw": record,
83
+ }
84
+ return None
@@ -0,0 +1,207 @@
1
+ """Shared, provider-neutral plumbing for the turn-state readers.
2
+
3
+ The per-provider readers (claude.py, codex.py) only translate their own record
4
+ shapes into a normalized list of lifecycle facts; everything else — JSONL
5
+ tail parsing, metadata filtering wiring, the verdict decision, and the
6
+ process-identity liveness guard — lives here so it is written once.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from typing import Any, Callable
13
+
14
+ # Normalized lifecycle fact kinds emitted by every reader.
15
+ TURN_OPEN = "turn_open"
16
+ TURN_COMPLETE = "turn_complete"
17
+ INTERRUPTED = "interrupted"
18
+ FAILED = "failed"
19
+ APPROVAL = "approval"
20
+ ERROR = "error" # non-closing structured error (e.g. transient api retry / tool is_error)
21
+
22
+ _CLOSING = {TURN_COMPLETE, INTERRUPTED, FAILED}
23
+
24
+
25
+ def parse_jsonl(text: str) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
26
+ """Parse JSONL text into (records, parse_diagnostics).
27
+
28
+ Lines that are blank are skipped. Lines that are not valid JSON objects are
29
+ collected as diagnostics rather than raising — the caller decides whether a
30
+ populated diagnostics list with zero usable records means ``unknown``.
31
+ """
32
+ records: list[dict[str, Any]] = []
33
+ diagnostics: list[dict[str, Any]] = []
34
+ for lineno, raw in enumerate(text.splitlines(), start=1):
35
+ line = raw.strip()
36
+ if not line:
37
+ continue
38
+ try:
39
+ obj = json.loads(line)
40
+ except (ValueError, TypeError):
41
+ diagnostics.append({"kind": "json_decode_error", "line": lineno})
42
+ continue
43
+ if not isinstance(obj, dict):
44
+ diagnostics.append({"kind": "non_object_record", "line": lineno})
45
+ continue
46
+ records.append(obj)
47
+ return records, diagnostics
48
+
49
+
50
+ def decide_state(
51
+ facts: list[dict[str, Any]],
52
+ *,
53
+ process: Any = None,
54
+ parse_diagnostics: list[dict[str, Any]] | None = None,
55
+ had_records: bool,
56
+ extra_diagnostics: list[dict[str, Any]] | None = None,
57
+ ) -> dict[str, Any]:
58
+ """Turn a normalized fact stream into the public classify result.
59
+
60
+ Verdict = the LAST lifecycle fact, not the last physical record. An open
61
+ turn (a ``turn_open`` not yet closed) is a positive "still working" fact
62
+ that survives arbitrary file silence (Gap 32 C14); the only thing that can
63
+ demote it is a failed process-identity guard (Gap 32 C4).
64
+ """
65
+ diagnostics = list(parse_diagnostics or []) + list(extra_diagnostics or [])
66
+
67
+ lifecycle = [f for f in facts if f.get("kind") in (_CLOSING | {TURN_OPEN, APPROVAL})]
68
+ if not lifecycle:
69
+ # No turn-lifecycle fact at all. If the input was unreadable/empty or a
70
+ # changed format with no recognizable records, fail safe to unknown (C5).
71
+ reason = "no_turn_lifecycle_fact"
72
+ if not had_records:
73
+ reason = "unreadable_or_empty"
74
+ elif diagnostics:
75
+ reason = "unrecognized_format"
76
+ return _result("unknown", None, reason, "session_file", [], diagnostics)
77
+
78
+ last = lifecycle[-1]
79
+ kind = last.get("kind")
80
+ turn_id = last.get("turn_id")
81
+ reason = str(last.get("reason") or kind)
82
+
83
+ if kind == TURN_COMPLETE:
84
+ return _result("idle", turn_id, reason or "turn_complete", "session_file", [], diagnostics)
85
+ if kind == INTERRUPTED:
86
+ return _result("idle_interrupted", turn_id, reason or "interrupted", "session_file", ["interrupted"], diagnostics)
87
+ if kind == FAILED:
88
+ return _result("abnormal", turn_id, reason or "turn_failed", "session_file", ["turn_failed"], diagnostics)
89
+ if kind == APPROVAL:
90
+ return _result("blocked_on_human", turn_id, reason or "approval_required", "session_file", ["awaiting_approval"], diagnostics)
91
+
92
+ # kind == TURN_OPEN with no later close → open turn. To declare "working" we
93
+ # must POSITIVELY confirm the recorded process is still alive (C4 fail-safe);
94
+ # missing/partial identity cannot be optimistically read as working.
95
+ verdict, live_reason, live_diag = process_liveness(process)
96
+ if live_diag:
97
+ diagnostics = diagnostics + [live_diag]
98
+ if verdict == "alive":
99
+ return _result("working", turn_id, "open_turn", "session_file", [], diagnostics)
100
+ if verdict == "dead":
101
+ return _result("abnormal", turn_id, "crashed_mid_turn", "process_guard", ["crashed_mid_turn", live_reason], diagnostics)
102
+ # unverifiable: cannot confirm alive → fail safe to unknown, never working.
103
+ return _result("unknown", turn_id, "process_identity_unverified", "process_guard", ["process_identity_unverified", live_reason], diagnostics)
104
+
105
+
106
+ _STRONG_IDENTITY_FIELDS = ("start_time", "cmdline", "create_time")
107
+
108
+
109
+ def process_liveness(process: Any) -> tuple[str, str, dict[str, Any] | None]:
110
+ """Process-identity liveness guard (Gap 32 C4) — three-valued.
111
+
112
+ Returns (verdict, reason, diagnostic) where verdict is one of:
113
+ - ``"alive"`` — positively confirmed the same process is running
114
+ - ``"dead"`` — confirmed replaced/exited (identity mismatch or flag)
115
+ - ``"unverifiable"`` — identity missing/partial; CANNOT be read as working
116
+
117
+ Identity, not bare PID: aliveness must be affirmatively confirmed by a strong
118
+ identity field (start_time / cmdline / create_time) present and equal in BOTH
119
+ the recorded and the current snapshot. Missing/partial info is fail-safe
120
+ unverifiable, never optimistically "alive".
121
+
122
+ Accepted ``process`` shapes (any one):
123
+ - None / non-dict → unverifiable
124
+ - {"alive"|"running": bool} → explicit
125
+ - {"identity_match": bool} → explicit identity verdict
126
+ - {"expected"|"recorded": {...}, "current"|"observed": {...}}
127
+ """
128
+ if process is None or not isinstance(process, dict):
129
+ return "unverifiable", "process_identity_missing", {"kind": "process_identity_unverified"}
130
+ if process.get("alive") is False or process.get("running") is False:
131
+ return "dead", "process_not_running", {"kind": "process_dead", "detail": "not_running"}
132
+ if process.get("identity_match") is False:
133
+ return "dead", "process_identity_mismatch", {"kind": "process_identity_mismatch"}
134
+ if process.get("alive") is True or process.get("running") is True or process.get("identity_match") is True:
135
+ return "alive", "process_alive", None
136
+ recorded = process.get("recorded") if isinstance(process.get("recorded"), dict) else process.get("expected")
137
+ current = process.get("current") if isinstance(process.get("current"), dict) else process.get("observed")
138
+ if not (isinstance(recorded, dict) and isinstance(current, dict)):
139
+ return "unverifiable", "process_identity_partial", {"kind": "process_identity_unverified"}
140
+ if current.get("alive") is False or current.get("running") is False:
141
+ return "dead", "process_not_running", {"kind": "process_dead", "detail": "current_not_running"}
142
+ # Any shared strong identity field that DIFFERS = confirmed replacement.
143
+ for key in _STRONG_IDENTITY_FIELDS:
144
+ if key in recorded and key in current and recorded.get(key) != current.get(key):
145
+ return "dead", f"process_identity_mismatch:{key}", {
146
+ "kind": "process_identity_mismatch",
147
+ "field": key,
148
+ "recorded": recorded.get(key),
149
+ "current": current.get(key),
150
+ }
151
+ # Require at least one strong identity field present+equal in BOTH, with no
152
+ # recorded strong field missing from current (else we cannot confirm).
153
+ recorded_strong = [k for k in _STRONG_IDENTITY_FIELDS if k in recorded]
154
+ confirmed = [k for k in recorded_strong if k in current and recorded.get(k) == current.get(k)]
155
+ missing = [k for k in recorded_strong if k not in current]
156
+ if confirmed and not missing:
157
+ return "alive", "process_identity_match", None
158
+ return "unverifiable", "process_identity_partial", {
159
+ "kind": "process_identity_unverified",
160
+ "recorded_strong": recorded_strong,
161
+ "confirmed": confirmed,
162
+ "missing": missing,
163
+ }
164
+
165
+
166
+ def process_is_live(process: Any) -> tuple[bool, str, dict[str, Any] | None]:
167
+ """Boolean wrapper used by conservative callers (e.g. whole-team-gone): a
168
+ process is treated as live unless it is CONFIRMED dead. Unverifiable counts
169
+ as live here so we never falsely declare the team gone."""
170
+ verdict, reason, diag = process_liveness(process)
171
+ return (verdict != "dead"), reason, diag
172
+
173
+
174
+ def _result(
175
+ state: str,
176
+ turn_id: str | None,
177
+ reason: str,
178
+ source: str,
179
+ annotations: list[str],
180
+ diagnostics: list[dict[str, Any]],
181
+ ) -> dict[str, Any]:
182
+ return {
183
+ "state": state,
184
+ "turn_id": turn_id,
185
+ "reason": reason,
186
+ "source": source,
187
+ "annotations": list(annotations),
188
+ "diagnostics": list(diagnostics),
189
+ }
190
+
191
+
192
+ def classify_with_reader(
193
+ extract_facts: Callable[[list[dict[str, Any]]], tuple[list[dict[str, Any]], list[dict[str, Any]]]],
194
+ session_log_text: str,
195
+ *,
196
+ process: Any = None,
197
+ ) -> dict[str, Any]:
198
+ """Run a provider reader's fact extractor through the shared pipeline."""
199
+ records, parse_diag = parse_jsonl(session_log_text or "")
200
+ facts, extra_diag = extract_facts(records)
201
+ return decide_state(
202
+ facts,
203
+ process=process,
204
+ parse_diagnostics=parse_diag,
205
+ had_records=bool(records),
206
+ extra_diagnostics=extra_diag,
207
+ )