PyPI - baserun-cli - Versions diffs - 0.1.3__tar.gz → 0.1.4__tar.gz - Mend

baserun-cli 0.1.3tar.gz → 0.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{baserun_cli-0.1.3 → baserun_cli-0.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: baserun-cli
-Version: 0.1.3
+Version: 0.1.4
 Summary: BaseRun agent-side daemon (connects to nchan, spawns CLI agents, publishes run events)
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{baserun_cli-0.1.3 → baserun_cli-0.1.4}/baserun_cli/channel.py RENAMED Viewed

@@ -82,73 +82,49 @@ class ChannelClient:
         }
     async def ensure_run_pub(self, run_id: str) -> None:
-        """Pre-connect the WS publisher for a run (fire-and-forget).
+        """Deprecated no-op.
-        Called at run start so the WS handshake completes before the first
-        event arrives. If this fails, publish_event will retry on demand.
+        WS publisher sends can succeed locally while nchan receives nothing
+        when a proxy/WebSocket keepalive breaks. Run events now use HTTP
+        publish as the authoritative, acknowledged path.
         """
-        if run_id in self._run_pubs:
-            return
-        try:
-            ws = await self._open_run_pub(run_id)
-            self._run_pubs[run_id] = ws
-        except Exception as e:
-            log.debug("pre-connect ws pub for run %s failed (will retry on first event): %s", run_id, e)
+        return
     async def publish_event(self, run_id: str, payload: dict[str, Any]) -> bool:
-        """Publish a run event via WebSocket (guarantees ordering within a run).
-        All events (including terminal) go through the same WS connection to
-        preserve event order in the nchan channel buffer. Mixing WS and HTTP
-        can cause ordering issues because HTTP requests may be processed by
-        nchan at different times relative to buffered WS messages.
+        """Publish a run event via HTTP and require an acknowledged response.
-        Returns True if published successfully.
+        We intentionally avoid the WebSocket publisher for run events: send()
+        only confirms that bytes entered the local socket buffer, not that nchan
+        accepted and buffered the event. HTTP gives per-event success/failure.
         """
-        data = json.dumps(payload, ensure_ascii=False)
-        is_terminal = (payload.get("data", {}).get("finished") is True)
-        max_retries = 4 if is_terminal else 2
-        for attempt in range(max_retries):
-            ws = self._run_pubs.get(run_id)
-            if ws is None:
-                try:
-                    ws = await self._open_run_pub(run_id)
-                    self._run_pubs[run_id] = ws
-                except Exception as e:
-                    log.warning("ws pub connect for run %s failed (attempt %d): %s", run_id, attempt + 1, e)
-                    if attempt < max_retries - 1:
-                        await asyncio.sleep(1.0)
-                    continue
-            try:
-                await ws.send(data)
-                return True
-            except Exception as e:
-                log.warning("ws publish to run %s failed (attempt %d): %s", run_id, attempt + 1, e)
-                self._run_pubs.pop(run_id, None)
-                if attempt < max_retries - 1:
-                    await asyncio.sleep(1.0)
-        log.error("publish failed for run %s seq=%s after %d attempts",
-                   run_id, payload.get("data", {}).get("seq"), max_retries)
-        return False
-    async def publish_event_reliably(self, run_id: str, payload: dict[str, Any]) -> bool:
-        """Publish a critical run event, falling back to HTTP if WS fails."""
-        if await self.publish_event(run_id, payload):
-            return True
         try:
             await self._http_publish(run_id, payload)
             return True
         except Exception as e:
-            log.error(
-                "http publish fallback failed for run %s seq=%s: %s",
+            log.warning(
+                "http publish to run %s failed seq=%s: %s",
                 run_id,
                 payload.get("data", {}).get("seq"),
                 e,
             )
             return False
+    async def publish_event_reliably(self, run_id: str, payload: dict[str, Any]) -> bool:
+        """Publish a critical run event with HTTP retries."""
+        max_retries = 5
+        for attempt in range(max_retries):
+            if await self.publish_event(run_id, payload):
+                return True
+            if attempt < max_retries - 1:
+                await asyncio.sleep(1.0)
+        log.error(
+            "reliable publish failed for run %s seq=%s after %d attempts",
+            run_id,
+            payload.get("data", {}).get("seq"),
+            max_retries,
+        )
+        return False
     async def _open_run_pub(self, run_id: str) -> Any:
         """Open a WS publisher connection for run:{run_id}."""
         base = self.nchan_url
@@ -175,83 +151,81 @@ class ChannelClient:
             except Exception:
                 pass
-    async def verify_and_replay(self, run_id: str, local_events: list[dict]) -> None:
-        """对比 nchan channel 实际状态与本地 events，补发缺失的事件。
-        1. 订阅 nchan channel，读取已有消息的 seq 集合
-        2. 对比本地 JSONL 的 seq 集合
-        3. 缺失的事件按 seq 顺序通过 WS 重新发送
+    async def verify_and_replay(self, run_id: str, local_events: list[dict]) -> bool:
+        """对比 nchan channel 实际状态与本地 events，补发缺失事件。
-        这比盲目重发 failed_events 更可靠——能捕获 ws.send() 返回成功
-        但数据实际未到达 nchan 的情况（TCP buffer 问题）。
+        返回 True 表示已确认 nchan 中存在 terminal；返回 False 表示仍未确认，
+        runner 不能把该 run 记为 recently-completed。
         """
         if not local_events:
-            return
+            return False
-        # 读取 nchan channel 当前状态
-        url = f"{self._publish_base.rstrip('/')}/internal/run/{run_id}"
-        nchan_seqs: set[int] = set()
-        has_terminal = False
-        try:
-            async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as c:
-                async with c.stream("GET", url, headers={"Accept": "text/event-stream"}) as resp:
-                    async for line in resp.aiter_lines():
-                        if not line.startswith("data:"):
-                            continue
-                        raw = line[5:].strip()
-                        if not raw:
-                            continue
-                        try:
-                            msg = json.loads(raw)
-                        except json.JSONDecodeError:
-                            continue
-                        data = msg.get("data") if isinstance(msg, dict) else msg
-                        if not isinstance(data, dict):
-                            continue
-                        seq = data.get("seq", 0)
-                        if seq:
-                            nchan_seqs.add(seq)
-                        if data.get("finished") and data.get("kind") in ("final", "error"):
-                            has_terminal = True
-                            break  # terminal found = all events present
-        except Exception as e:
-            log.warning("verify: failed to read nchan for run %s: %s", run_id, e)
-            # nchan 读取失败，保守重发所有事件
-            nchan_seqs = set()
+        async def read_nchan() -> tuple[set[int], bool, bool]:
+            """Return (seqs, has_terminal, read_ok)."""
+            url = f"{self._publish_base.rstrip('/')}/internal/run/{run_id}"
+            seqs: set[int] = set()
+            terminal = False
+            try:
+                async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=5.0)) as c:
+                    async with c.stream("GET", url, headers={"Accept": "text/event-stream"}) as resp:
+                        resp.raise_for_status()
+                        async for line in resp.aiter_lines():
+                            if not line.startswith("data:"):
+                                continue
+                            raw = line[5:].strip()
+                            if not raw:
+                                continue
+                            try:
+                                msg = json.loads(raw)
+                            except json.JSONDecodeError:
+                                continue
+                            data = msg.get("data") if isinstance(msg, dict) else msg
+                            if not isinstance(data, dict):
+                                continue
+                            seq = data.get("seq", 0)
+                            if seq:
+                                seqs.add(seq)
+                            if data.get("finished") and data.get("kind") in ("final", "error"):
+                                terminal = True
+                                break
+                return seqs, terminal, True
+            except Exception as e:
+                log.warning("verify: failed to read nchan for run %s: %s", run_id, e)
+                return set(), False, False
-        # terminal 已在 nchan 中 = 全部到齐
+        nchan_seqs, has_terminal, read_ok = await read_nchan()
         if has_terminal:
             log.info("verify: run %s terminal present in nchan, all good", run_id)
-            return
+            return True
-        # 找出缺失的事件
         local_seqs = {ev.get("data", {}).get("seq", 0) for ev in local_events}
         missing_seqs = local_seqs - nchan_seqs
-        if not missing_seqs and has_terminal:
-            return
         if not missing_seqs:
-            # nchan 有所有非 terminal 事件但缺 terminal → 重发 terminal
-            missing_seqs = {ev.get("data", {}).get("seq", 0) for ev in local_events
-                           if ev.get("data", {}).get("finished") is True} - nchan_seqs
+            missing_seqs = {
+                ev.get("data", {}).get("seq", 0)
+                for ev in local_events
+                if ev.get("data", {}).get("finished") is True
+            } - nchan_seqs
+        if not missing_seqs and read_ok:
+            log.error("verify: run %s has all seqs but no terminal", run_id)
+            return False
         if not missing_seqs:
-            log.info("verify: run %s no missing events", run_id)
-            return
+            # 无法读取 nchan，保守重放所有事件。
+            missing_seqs = local_seqs
-        # 按 seq 排序重发
         missing_events = sorted(
             [ev for ev in local_events if ev.get("data", {}).get("seq", 0) in missing_seqs],
             key=lambda ev: ev.get("data", {}).get("seq", 0),
         )
+        seq_preview = [ev["data"]["seq"] for ev in missing_events[:50]]
+        suffix = "..." if len(missing_events) > 50 else ""
         log.info(
-            "verify: run %s replaying %d missing events (seqs: %s, nchan has %d/%d)",
-            run_id, len(missing_events),
-            [ev["data"]["seq"] for ev in missing_events],
-            len(nchan_seqs), len(local_seqs),
+            "verify: run %s replaying %d missing events (seqs: %s%s, nchan has %d/%d)",
+            run_id, len(missing_events), seq_preview, suffix, len(nchan_seqs), len(local_seqs),
         )
+        replay_ok = True
         for ev in missing_events:
             data = ev.get("data", {})
             if data.get("finished") is True:
@@ -259,8 +233,22 @@ class ChannelClient:
             else:
                 ok = await self.publish_event(run_id, ev)
             if not ok:
+                replay_ok = False
                 log.error("verify: replay still failed for run %s seq=%s", run_id, data.get("seq"))
+        if not replay_ok:
+            return False
+        nchan_seqs, has_terminal, read_ok = await read_nchan()
+        if has_terminal:
+            log.info("verify: run %s terminal present after replay, all good", run_id)
+            return True
+        log.error(
+            "verify: run %s terminal still missing after replay (read_ok=%s, nchan has %d/%d)",
+            run_id, read_ok, len(nchan_seqs), len(local_seqs),
+        )
+        return False
     async def run(self) -> None:
         """Main loop: WS subscriber + HTTP claim-task poller, running in parallel.

{baserun_cli-0.1.3 → baserun_cli-0.1.4}/baserun_cli/runner.py RENAMED Viewed

@@ -165,10 +165,6 @@ class TaskRunner:
         events_iter = self._select_events(connector, mode, prompt, agent_session_id)
-        # pre-connect WS publisher (don't wait for first event — avoids 10s+ delay
-        # where early events accumulate and arrive as a burst, killing streaming UX)
-        asyncio.create_task(self.channel.ensure_run_pub(run_id))
         # local persistence: raw event log for debugging + retry
         log_dir = os.path.join(os.path.expanduser("~"), ".lark-agent-hub-logs")
         os.makedirs(log_dir, exist_ok=True)
@@ -242,8 +238,13 @@ class TaskRunner:
                 except Exception:
                     pass
+                verified = False
                 if all_local:
-                    await self.channel.verify_and_replay(run_id, all_local)
+                    verified = await self.channel.verify_and_replay(run_id, all_local)
+                if not verified:
+                    log.error("run %s finished locally but terminal was not confirmed in nchan", run_id)
+                    return False, terminal_status, seq
                 log.info("run %s completed (%d events, log: %s)", run_id, seq, log_path)
             return True, terminal_status, seq
@@ -267,8 +268,8 @@ class TaskRunner:
                     log_file.write(json.dumps(error_payload["data"], ensure_ascii=False) + "\n")
             except Exception:
                 pass
-            await self.channel.publish_event_reliably(run_id, error_payload)
-            return True, "error", seq
+            ok = await self.channel.publish_event_reliably(run_id, error_payload)
+            return ok, "error", seq
         finally:
             await self.channel.close_run_pub(run_id)

{baserun_cli-0.1.3 → baserun_cli-0.1.4}/baserun_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: baserun-cli
-Version: 0.1.3
+Version: 0.1.4
 Summary: BaseRun agent-side daemon (connects to nchan, spawns CLI agents, publishes run events)
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{baserun_cli-0.1.3 → baserun_cli-0.1.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "baserun-cli"
-version = "0.1.3"
+version = "0.1.4"
 description = "BaseRun agent-side daemon (connects to nchan, spawns CLI agents, publishes run events)"
 readme = "README.md"
 requires-python = ">=3.11"