forgexa-cli 1.7.2__tar.gz → 1.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: forgexa-cli
3
- Version: 1.7.2
3
+ Version: 1.7.6
4
4
  Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
5
5
  Author-email: Jason Sun <dev.winds@gmail.com>
6
6
  License: MIT
@@ -1,2 +1,2 @@
1
1
  """forgexa-cli — Forgexa command-line client."""
2
- __version__ = "1.7.2"
2
+ __version__ = "1.7.6"
@@ -29,6 +29,7 @@ import base64
29
29
  import hashlib
30
30
  import json
31
31
  import logging
32
+ from logging.handlers import RotatingFileHandler
32
33
  import os
33
34
  import platform
34
35
  import re
@@ -301,6 +302,16 @@ except (ImportError, ModuleNotFoundError):
301
302
  def AGENT_MAX_OUTPUT_SIZE(self) -> int:
302
303
  return int(os.environ.get("AGENT_MAX_OUTPUT_SIZE", "100000"))
303
304
 
305
+ @property
306
+ def FACTORY_CODEX_SANDBOX(self) -> str:
307
+ """Codex sandbox mode: 'bypass' (default, safe) or 'bwrap' (Linux only).
308
+
309
+ 'bypass' uses --dangerously-bypass-approvals-and-sandbox which works
310
+ in all environments including Docker without CAP_NET_ADMIN.
311
+ 'bwrap' uses --full-auto (bubblewrap) which requires CAP_NET_ADMIN.
312
+ """
313
+ return os.environ.get("FACTORY_CODEX_SANDBOX", "bypass").strip().lower()
314
+
304
315
  def get_daemon_workspaces_root(self) -> str:
305
316
  root = self.DAEMON_WORKSPACES_ROOT
306
317
  if not root:
@@ -321,7 +332,7 @@ except (ImportError, ModuleNotFoundError):
321
332
  # DAEMON_VERSION is the protocol/logic version of the daemon code.
322
333
  # Kept in sync with pyproject.toml version via bump-version.sh.
323
334
  # CLIENT_TYPE identifies which packaging/distribution this daemon runs in.
324
- DAEMON_VERSION = "1.7.2"
335
+ DAEMON_VERSION = "1.7.6"
325
336
 
326
337
 
327
338
  def _detect_client_type() -> str:
@@ -358,7 +369,11 @@ _log_dir.mkdir(parents=True, exist_ok=True)
358
369
  DAEMON_LOG_PATH = _log_dir / "daemon.log"
359
370
 
360
371
  _log_handlers: list[logging.Handler] = [
361
- logging.FileHandler(DAEMON_LOG_PATH, mode="a", encoding="utf-8"),
372
+ RotatingFileHandler(
373
+ DAEMON_LOG_PATH, mode="a", encoding="utf-8",
374
+ maxBytes=50 * 1024 * 1024, # 50 MB per file
375
+ backupCount=5,
376
+ ),
362
377
  ]
363
378
  if sys.stderr.isatty():
364
379
  _log_handlers.append(logging.StreamHandler(sys.stderr))
@@ -713,6 +728,9 @@ class AgentDiscovery:
713
728
 
714
729
  async def discover(self) -> list[DiscoveredAgent]:
715
730
  self._expand_path()
731
+ # Probe bwrap support once at discovery time and log a clear warning
732
+ # if it is broken. This surfaces the error early rather than mid-task.
733
+ await self._probe_bwrap_support()
716
734
  available = []
717
735
  for agent_id, spec in self.AGENT_REGISTRY.items():
718
736
  custom_path = os.environ.get(spec.get("env_path_override", ""))
@@ -732,8 +750,9 @@ class AgentDiscovery:
732
750
 
733
751
  async def _get_version(self, detect_cmd: str) -> str:
734
752
  try:
735
- proc = await asyncio.create_subprocess_shell(
736
- detect_cmd,
753
+ parts = detect_cmd.split()
754
+ proc = await asyncio.create_subprocess_exec(
755
+ *parts,
737
756
  stdout=asyncio.subprocess.PIPE,
738
757
  stderr=asyncio.subprocess.PIPE,
739
758
  )
@@ -742,8 +761,60 @@ class AgentDiscovery:
742
761
  except Exception:
743
762
  return "unknown"
744
763
 
764
+ @staticmethod
765
+ async def _probe_bwrap_support() -> None:
766
+ """Probe whether bubblewrap (bwrap) works in this environment.
745
767
 
746
- # ── Workspace Manager ──
768
+ codex exec --full-auto internally creates a bubblewrap sandbox that
769
+ requires a network namespace (CAP_NET_ADMIN). Inside Docker containers
770
+ or other restricted Linux environments this fails immediately with:
771
+ bwrap: loopback: Failed RTM_NEWADDR: Operation not permitted
772
+
773
+ We probe at startup so the operator gets an actionable warning rather
774
+ than a cryptic mid-task failure. The probe is skipped on macOS/Windows
775
+ because Codex uses a different sandbox mechanism on those platforms.
776
+ """
777
+ if sys.platform != "linux":
778
+ return
779
+ sandbox_mode = os.environ.get("FACTORY_CODEX_SANDBOX", "bypass").strip().lower()
780
+ if sandbox_mode != "bwrap":
781
+ # Default mode bypasses sandbox — no bwrap needed, skip probe.
782
+ return
783
+ bwrap_bin = shutil.which("bwrap")
784
+ if not bwrap_bin:
785
+ logger.warning(
786
+ "FACTORY_CODEX_SANDBOX=bwrap but bwrap binary not found. "
787
+ "Codex sandbox will fail. Either install bwrap or unset "
788
+ "FACTORY_CODEX_SANDBOX to use bypass mode (default)."
789
+ )
790
+ return
791
+ try:
792
+ proc = await asyncio.create_subprocess_exec(
793
+ bwrap_bin,
794
+ "--dev", "/dev",
795
+ "--proc", "/proc",
796
+ "--ro-bind", "/usr", "/usr",
797
+ "--unshare-net",
798
+ "true",
799
+ stdout=asyncio.subprocess.DEVNULL,
800
+ stderr=asyncio.subprocess.PIPE,
801
+ )
802
+ _, stderr = await asyncio.wait_for(proc.communicate(), timeout=5)
803
+ if proc.returncode != 0:
804
+ err = (stderr or b"").decode(errors="replace").strip()
805
+ logger.warning(
806
+ "bwrap probe failed (exit=%d): %s. "
807
+ "codex exec --full-auto will fail in this environment. "
808
+ "Unset FACTORY_CODEX_SANDBOX to use bypass mode (default), "
809
+ "or grant CAP_NET_ADMIN / run privileged.",
810
+ proc.returncode, err,
811
+ )
812
+ else:
813
+ logger.info("bwrap probe: network namespaces work in this environment")
814
+ except asyncio.TimeoutError:
815
+ logger.warning("bwrap probe timed out — treating as unsupported")
816
+ except Exception as exc:
817
+ logger.warning("bwrap probe error: %s", exc)
747
818
 
748
819
 
749
820
  class WorkspaceManager:
@@ -986,6 +1057,71 @@ class WorkspaceManager:
986
1057
  # Remove the broken worktree directory
987
1058
  shutil.rmtree(ws_path, ignore_errors=True)
988
1059
 
1060
+ async def _detect_unrelated_histories(self, repo_path: Path, project_key: str) -> bool:
1061
+ """Detect whether local clone has diverged from remote due to history rewrite.
1062
+
1063
+ When a remote repo is rewritten (e.g. via BFG or git filter-repo to
1064
+ remove large files), all commit SHAs change. The local clone retains
1065
+ the old SHAs in its object store, making fetch/reset/merge fail in
1066
+ cryptic ways.
1067
+
1068
+ Strategy: ask git whether the local HEAD commit object is reachable in
1069
+ the remote graph. We use `git ls-remote` to get the remote HEAD SHA,
1070
+ then check if that SHA exists locally. If the remote HEAD does NOT
1071
+ exist locally, histories are definitely unrelated.
1072
+
1073
+ Additionally, if the repo has a shallow marker but the remote default
1074
+ branch has diverged past the shallow grafts, `git fetch` itself will
1075
+ indicate problems.
1076
+ """
1077
+ try:
1078
+ # Get the local HEAD SHA
1079
+ local_proc = await asyncio.create_subprocess_exec(
1080
+ "git", "rev-parse", "HEAD",
1081
+ cwd=str(repo_path),
1082
+ stdout=asyncio.subprocess.PIPE,
1083
+ stderr=asyncio.subprocess.PIPE,
1084
+ )
1085
+ local_out, _ = await asyncio.wait_for(local_proc.communicate(), timeout=10)
1086
+ if local_proc.returncode != 0:
1087
+ return False
1088
+ local_head = local_out.decode().strip()
1089
+ if not local_head:
1090
+ return False
1091
+
1092
+ # Get the remote HEAD SHA via ls-remote (no network for local check)
1093
+ # Try to see if the remote HEAD is in local object store
1094
+ # If git cat-file -e <remote_sha> succeeds, remote HEAD is known locally
1095
+ # (histories still share commits). Otherwise, fully diverged.
1096
+ #
1097
+ # However, after a history rewrite the remote HEAD is a brand-new SHA,
1098
+ # and the local object store only has old SHAs. So we check the other
1099
+ # direction: does the local HEAD exist on the remote at all?
1100
+ # We use `git branch -r --contains <local_head>` which lists remote
1101
+ # tracking branches that contain that commit. If none, it's unrelated.
1102
+ check_proc = await asyncio.create_subprocess_exec(
1103
+ "git", "branch", "-r", "--contains", local_head,
1104
+ cwd=str(repo_path),
1105
+ stdout=asyncio.subprocess.PIPE,
1106
+ stderr=asyncio.subprocess.PIPE,
1107
+ )
1108
+ out, _ = await asyncio.wait_for(check_proc.communicate(), timeout=10)
1109
+ if check_proc.returncode != 0:
1110
+ # Command failed (e.g. invalid object) — history is broken
1111
+ return True
1112
+ remote_branches = out.decode().strip()
1113
+ if not remote_branches:
1114
+ # Local HEAD is not reachable from any remote branch — unrelated
1115
+ logger.info(
1116
+ "Local HEAD %s not found in any remote branch at %s — "
1117
+ "histories appear unrelated (remote may have been rewritten).",
1118
+ local_head[:12], repo_path,
1119
+ )
1120
+ return True
1121
+ except Exception:
1122
+ pass
1123
+ return False
1124
+
989
1125
  async def _create_worktree(
990
1126
  self, project_dir: Path, repo_url: str, default_branch: str,
991
1127
  workspace_key: str, branch_name: str, *, fresh_start: bool = False,
@@ -1143,6 +1279,25 @@ class WorkspaceManager:
1143
1279
  )
1144
1280
  if not sync_success:
1145
1281
  if expect_branch:
1282
+ # Before giving up, check for history-rewrite: if the remote
1283
+ # history was rewritten (all SHAs changed), local objects are
1284
+ # stale and no amount of retries will fix sync. Detect this
1285
+ # and destroy the workspace + _main so they get recloned.
1286
+ is_unrelated = await self._detect_unrelated_histories(ws_path, project_key)
1287
+ if is_unrelated:
1288
+ logger.warning(
1289
+ "Detected repository history mismatch for worktree %s "
1290
+ "(remote history likely rewritten). Discarding stale "
1291
+ "worktree and _main clone for a full re-clone on retry.",
1292
+ ws_path,
1293
+ )
1294
+ await self._remove_broken_worktree(main_repo, ws_path, workspace_key)
1295
+ shutil.rmtree(main_repo, ignore_errors=True)
1296
+ raise RuntimeError(
1297
+ f"Repository history was rewritten (e.g. large-file cleanup). "
1298
+ f"Stale local clone discarded. "
1299
+ f"The task will be retried with a fresh clone."
1300
+ )
1146
1301
  raise RuntimeError(
1147
1302
  f"Failed to sync branch '{branch_name}' from remote after 3 attempts. "
1148
1303
  f"The branch should exist (pushed by prior analysis/design phase). "
@@ -1163,7 +1318,36 @@ class WorkspaceManager:
1163
1318
  repo_url, str(main_repo), timeout=settings.GIT_CLONE_TIMEOUT, project_key=project_key,
1164
1319
  )
1165
1320
  else:
1166
- await self._git("fetch", "--all", cwd=main_repo, timeout=300, project_key=project_key)
1321
+ # Use targeted fetch instead of --all to avoid pulling every branch/tag
1322
+ # from potentially large repos (avoids 300s timeout on big repos).
1323
+ # Fetch default branch only; the feature branch is explicitly fetched below.
1324
+ try:
1325
+ await self._git(
1326
+ "fetch", "origin", default_branch,
1327
+ cwd=main_repo, timeout=settings.GIT_CLONE_TIMEOUT, project_key=project_key,
1328
+ )
1329
+ except RuntimeError as _fetch_err:
1330
+ err_str = str(_fetch_err)
1331
+ # Detect "unrelated histories" / history-rewrite scenarios:
1332
+ # If the remote history was rewritten (e.g. BFG large-file removal),
1333
+ # all commit SHAs change. The local clone becomes incompatible —
1334
+ # fetch may succeed but the local refs are orphaned and unusable.
1335
+ # Detection: check whether local HEAD exists in the remote graph.
1336
+ is_unrelated = await self._detect_unrelated_histories(main_repo, project_key)
1337
+ if is_unrelated or "not our ref" in err_str or "shallow" in err_str:
1338
+ logger.warning(
1339
+ "Detected repository history mismatch for %s (remote history likely "
1340
+ "rewritten). Discarding stale local clone and re-cloning from scratch.",
1341
+ main_repo,
1342
+ )
1343
+ shutil.rmtree(main_repo, ignore_errors=True)
1344
+ await self._git(
1345
+ "clone", "--single-branch", "--no-tags",
1346
+ repo_url, str(main_repo), timeout=settings.GIT_CLONE_TIMEOUT,
1347
+ project_key=project_key,
1348
+ )
1349
+ else:
1350
+ raise
1167
1351
 
1168
1352
  # --single-branch clone only fetches the default branch.
1169
1353
  # Explicitly fetch the feature branch so origin/{branch_name}
@@ -1477,7 +1661,12 @@ class ProcessManager:
1477
1661
  "name or service not known",
1478
1662
  "no such host",
1479
1663
  "network is unreachable",
1480
- "api error",
1664
+ # "api error" removed: too broad — matches agent-generated code/output
1665
+ # discussing API errors. Real API transport errors are covered by the
1666
+ # connection patterns above (refused, reset, timed out, etc.).
1667
+ "apiexception:",
1668
+ "api error: 5", # 5xx errors like "API error: 503", "API error: 502"
1669
+ "api error: connection",
1481
1670
  ]
1482
1671
 
1483
1672
  def __init__(self):
@@ -1932,7 +2121,30 @@ class ProcessManager:
1932
2121
  timeout=timeout,
1933
2122
  )
1934
2123
  except asyncio.TimeoutError:
1935
- proc.kill()
2124
+ # Kill the entire process group so that child processes (npm, yarn,
2125
+ # ssh, git, etc.) spawned by the agent are also terminated. A plain
2126
+ # proc.kill() only kills the direct subprocess; any grandchildren
2127
+ # become orphaned, keep pipes open, and can exhaust system resources.
2128
+ try:
2129
+ if sys.platform != "win32":
2130
+ import signal as _signal
2131
+ try:
2132
+ os.killpg(os.getpgid(proc.pid), _signal.SIGKILL)
2133
+ except (ProcessLookupError, PermissionError, OSError):
2134
+ pass
2135
+ else:
2136
+ import subprocess as _subprocess
2137
+ _subprocess.run(
2138
+ ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
2139
+ capture_output=True,
2140
+ )
2141
+ except Exception:
2142
+ pass
2143
+ finally:
2144
+ try:
2145
+ proc.kill()
2146
+ except Exception:
2147
+ pass
1936
2148
  # Drain any remaining output after kill
1937
2149
  try:
1938
2150
  remaining, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
@@ -1999,6 +2211,7 @@ class ProcessManager:
1999
2211
  cwd=str(cwd),
2000
2212
  env=env,
2001
2213
  limit=100 * 1024 * 1024, # 100MB line buffer for large JSON output from long sessions
2214
+ start_new_session=True, # own process group → killpg on timeout kills all children
2002
2215
  )
2003
2216
  self.active_processes[task_id] = proc
2004
2217
  stdout, stderr, returncode = await self._stream_process(
@@ -2068,9 +2281,57 @@ class ProcessManager:
2068
2281
  self, agent: DiscoveredAgent, prompt: str, cwd: Path, timeout: int, task_id: str,
2069
2282
  on_chunk: Any = None,
2070
2283
  ) -> TaskResult:
2071
- """Run Codex CLI in exec mode (non-interactive)."""
2072
- cmd = [agent.command, "exec", "--full-auto", "--json", "-"]
2284
+ """Run Codex CLI in exec mode (non-interactive).
2285
+
2286
+ Sandbox mode selection (FACTORY_CODEX_SANDBOX env var):
2287
+ - "bypass" (default): --dangerously-bypass-approvals-and-sandbox
2288
+ Safe for daemon context: the daemon already runs on a controlled
2289
+ machine and the workspace path is pre-scoped to the project.
2290
+ Required when running inside Docker or any environment that lacks
2291
+ CAP_NET_ADMIN, because codex --full-auto internally uses bubblewrap
2292
+ (bwrap) which tries to set up a loopback network interface and fails
2293
+ with "bwrap: loopback: Failed RTM_NEWADDR: Operation not permitted".
2294
+ - "bwrap": --full-auto (uses bubblewrap Linux sandbox). Only works
2295
+ when bwrap can create user+network namespaces (bare-metal Linux,
2296
+ not inside most Docker containers).
2297
+ """
2298
+ sandbox_mode = os.environ.get("FACTORY_CODEX_SANDBOX", "bypass").strip().lower()
2299
+ if sandbox_mode == "bwrap":
2300
+ sandbox_flag = "--full-auto"
2301
+ else:
2302
+ # Default: bypass sandbox entirely — no bwrap, no approval prompts.
2303
+ # Equivalent to Kimi's --yolo and OpenCode's --dangerously-skip-permissions.
2304
+ sandbox_flag = "--dangerously-bypass-approvals-and-sandbox"
2305
+
2306
+ cmd = [agent.command, "exec", sandbox_flag, "--json", "-"]
2073
2307
  result = await self._run_cli(cmd, cwd, timeout, task_id, stdin_input=prompt, on_chunk=on_chunk)
2308
+
2309
+ # Detect the bwrap loopback error and surface a clear, actionable message.
2310
+ # This happens when FACTORY_CODEX_SANDBOX=bwrap (or any future codex version
2311
+ # that enables bwrap by default) is used inside Docker/container environments
2312
+ # that lack CAP_NET_ADMIN.
2313
+ if result.status == "failed" and "RTM_NEWADDR" in (result.stderr or ""):
2314
+ logger.error(
2315
+ "Codex sandbox (bwrap) failed for task %s with network namespace error. "
2316
+ "Set FACTORY_CODEX_SANDBOX=bypass (default) to disable bwrap sandboxing. "
2317
+ "Original error: %s",
2318
+ task_id, (result.stderr or "").strip()[:500],
2319
+ )
2320
+ result = TaskResult(
2321
+ status="failed",
2322
+ exit_code=result.exit_code,
2323
+ stdout=result.stdout,
2324
+ stderr=result.stderr,
2325
+ error=(
2326
+ "codex_sandbox_error: bubblewrap (bwrap) failed to create a network "
2327
+ "namespace (RTM_NEWADDR: Operation not permitted). This environment "
2328
+ "does not support bwrap sandboxing (e.g. Docker without CAP_NET_ADMIN). "
2329
+ "Fix: set FACTORY_CODEX_SANDBOX=bypass in the daemon environment "
2330
+ "(this is already the default — check that no override is set)."
2331
+ ),
2332
+ metrics=result.metrics,
2333
+ )
2334
+
2074
2335
  parsed_metrics = self._parse_agent_jsonl_output(result.stdout)
2075
2336
  result.metrics.update(parsed_metrics)
2076
2337
  return result
@@ -2079,14 +2340,25 @@ class ProcessManager:
2079
2340
  self, agent: DiscoveredAgent, prompt: str, cwd: Path, timeout: int, task_id: str,
2080
2341
  on_chunk: Any = None,
2081
2342
  ) -> TaskResult:
2082
- """Run OpenCode CLI in non-interactive mode."""
2343
+ """Run OpenCode CLI in non-interactive mode.
2344
+
2345
+ Uses `opencode run --format json --dir <cwd>` for headless execution.
2346
+ The message is passed as a positional argument.
2347
+ NOTE: `--dir` is the correct flag (not `--cwd` which is invalid).
2348
+ """
2083
2349
  cmd = [
2084
2350
  agent.command, "run",
2085
2351
  "--format", "json",
2086
2352
  "--dangerously-skip-permissions",
2087
- "--cwd", str(cwd),
2088
- prompt,
2353
+ "--dir", str(cwd),
2089
2354
  ]
2355
+ # Apply model override if configured (e.g. FACTORY_OPENCODE_MODEL=copilot/gpt-4.1)
2356
+ model_override = os.environ.get("FACTORY_OPENCODE_MODEL")
2357
+ if model_override:
2358
+ cmd += ["--model", model_override]
2359
+ # -- ensures yargs treats everything after it as positional args, not flags.
2360
+ # Without this, prompts containing --flag-like text cause yargs to print help and exit 1.
2361
+ cmd += ["--", prompt]
2090
2362
  result = await self._run_cli(cmd, cwd, timeout, task_id, on_chunk=on_chunk)
2091
2363
  parsed_metrics = self._parse_agent_jsonl_output(result.stdout)
2092
2364
  result.metrics.update(parsed_metrics)
@@ -2135,6 +2407,7 @@ class ProcessManager:
2135
2407
  stdin=asyncio.subprocess.PIPE if stdin_input else None,
2136
2408
  cwd=str(cwd),
2137
2409
  limit=100 * 1024 * 1024, # 100MB line buffer for large agent output
2410
+ start_new_session=True, # own process group → killpg on timeout kills all children
2138
2411
  )
2139
2412
  self.active_processes[task_id] = proc
2140
2413
  stdin_bytes = stdin_input.encode() if stdin_input else None
@@ -2150,8 +2423,28 @@ class ProcessManager:
2150
2423
  error="" if status == "success" else f"Exited with code {returncode}",
2151
2424
  )
2152
2425
  except asyncio.TimeoutError:
2153
- if task_id in self.active_processes:
2154
- self.active_processes[task_id].kill()
2426
+ proc = self.active_processes.pop(task_id, None)
2427
+ if proc:
2428
+ try:
2429
+ if sys.platform != "win32":
2430
+ import signal as _signal
2431
+ try:
2432
+ os.killpg(os.getpgid(proc.pid), _signal.SIGKILL)
2433
+ except (ProcessLookupError, PermissionError, OSError):
2434
+ pass
2435
+ else:
2436
+ import subprocess as _subprocess
2437
+ _subprocess.run(
2438
+ ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
2439
+ capture_output=True,
2440
+ )
2441
+ except Exception:
2442
+ pass
2443
+ finally:
2444
+ try:
2445
+ proc.kill()
2446
+ except Exception:
2447
+ pass
2155
2448
  return TaskResult(
2156
2449
  status="failed", exit_code=-1, stdout="", stderr="",
2157
2450
  error=f"Timed out after {timeout}s",
@@ -2556,10 +2849,28 @@ class ProcessManager:
2556
2849
  return info
2557
2850
 
2558
2851
  async def cancel(self, task_id: str):
2559
- proc = self.active_processes.get(task_id)
2852
+ proc = self.active_processes.pop(task_id, None)
2560
2853
  if proc:
2561
- proc.kill()
2562
- self.active_processes.pop(task_id, None)
2854
+ try:
2855
+ if sys.platform != "win32":
2856
+ import signal as _signal
2857
+ try:
2858
+ os.killpg(os.getpgid(proc.pid), _signal.SIGKILL)
2859
+ except (ProcessLookupError, PermissionError, OSError):
2860
+ pass
2861
+ else:
2862
+ import subprocess as _subprocess
2863
+ _subprocess.run(
2864
+ ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
2865
+ capture_output=True,
2866
+ )
2867
+ except Exception:
2868
+ pass
2869
+ finally:
2870
+ try:
2871
+ proc.kill()
2872
+ except Exception:
2873
+ pass
2563
2874
 
2564
2875
 
2565
2876
  # ── Progress Reporter ──
@@ -3966,12 +4277,27 @@ class RuntimeDaemon:
3966
4277
 
3967
4278
  # Testing-specific: validate structured test assets
3968
4279
  if node_type == "testing":
3969
- # Check if this type requires full test artifacts
4280
+ # Determine which checks to run for this requirement type.
4281
+ #
4282
+ # _skip_test_artifacts = True → skip ALL artifact checks
4283
+ # (set for types that explicitly list "test_coverage" in
4284
+ # skip_dimensions, e.g. "task", "documentation", "spike")
4285
+ #
4286
+ # _requires_structured_artifacts = True → test-cases.json and
4287
+ # coverage-matrix.json are *required* deliverables.
4288
+ # Set only for "feature" and "improvement" — types whose
4289
+ # testing phase is a full QA suite rather than regression
4290
+ # verification. For "bugfix", "refactor", etc. these files
4291
+ # are *optional*: if they exist they are validated, but their
4292
+ # absence is not an error (the agent only writes regression
4293
+ # tests + test-report.md).
3970
4294
  _skip_test_artifacts = False
4295
+ _requires_structured_artifacts = False
3971
4296
  try:
3972
4297
  from app.services.type_workflow_profiles import get_profile
3973
4298
  _profile = get_profile(req_type)
3974
4299
  _skip_test_artifacts = "test_coverage" in _profile.skip_dimensions
4300
+ _requires_structured_artifacts = req_type in ("feature", "improvement")
3975
4301
  except Exception:
3976
4302
  pass
3977
4303
 
@@ -3988,6 +4314,8 @@ class RuntimeDaemon:
3988
4314
  base = workspace_path
3989
4315
 
3990
4316
  # --- test-cases.json validation ---
4317
+ # Required for feature/improvement; optional (but validated
4318
+ # if present) for all other testing node types.
3991
4319
  tc_path = base / "test-cases.json"
3992
4320
  if tc_path.exists():
3993
4321
  try:
@@ -3996,19 +4324,24 @@ class RuntimeDaemon:
3996
4324
  if not cases:
3997
4325
  issues.append("test-cases.json exists but contains no test cases")
3998
4326
  else:
4327
+ # Collect ALL malformed test cases in one pass so
4328
+ # the retry prompt can fix everything at once.
4329
+ # (Previously a `break` was used here which caused
4330
+ # a one-issue-per-retry cascade, burning through
4331
+ # max_retries before the file was fully corrected.)
3999
4332
  for tc in cases[:20]:
4000
4333
  if not tc.get("id") or not tc.get("title"):
4001
- issues.append(f"Test case missing 'id' or 'title': {tc.get('id', '?')}")
4002
- break
4003
- if not tc.get("steps"):
4334
+ issues.append(
4335
+ f"Test case missing 'id' or 'title': {tc.get('id', '?')}"
4336
+ )
4337
+ elif not tc.get("steps"):
4004
4338
  issues.append(f"Test case {tc['id']} has no 'steps'")
4005
- break
4006
4339
  p0_cases = [c for c in cases if c.get("priority") == "P0"]
4007
4340
  if not p0_cases:
4008
4341
  issues.append("No P0 priority test cases found in test-cases.json")
4009
4342
  except (_json.JSONDecodeError, UnicodeDecodeError) as e:
4010
4343
  issues.append(f"test-cases.json is not valid JSON: {e}")
4011
- else:
4344
+ elif _requires_structured_artifacts:
4012
4345
  issues.append(f"test-cases.json not found in {doc_dir or 'workspace root'}")
4013
4346
 
4014
4347
  # --- coverage-matrix.json validation ---
@@ -4023,7 +4356,7 @@ class RuntimeDaemon:
4023
4356
  issues.append(f"Uncovered acceptance criteria in coverage-matrix.json: {ids}")
4024
4357
  except (_json.JSONDecodeError, UnicodeDecodeError) as e:
4025
4358
  issues.append(f"coverage-matrix.json is not valid JSON: {e}")
4026
- else:
4359
+ elif _requires_structured_artifacts:
4027
4360
  issues.append(f"coverage-matrix.json not found in {doc_dir or 'workspace root'}")
4028
4361
 
4029
4362
  # --- test-report.md validation ---
@@ -5036,15 +5369,29 @@ class RuntimeDaemon:
5036
5369
  )
5037
5370
 
5038
5371
  logger.info("Found unpushed commits on %s, pushing...", branch)
5039
- try:
5040
- await git(
5041
- "push", "-u", "origin", branch,
5042
- cwd=workspace_path, project_key=project_key,
5043
- )
5044
- logger.info("Pushed branch %s to origin", branch)
5045
- except RuntimeError as exc:
5046
- logger.error("Push failed for branch %s: %s", branch, exc)
5047
- return f"Push failed: {exc}"
5372
+ last_push_exc: Exception | None = None
5373
+ for attempt in range(1, 4): # retry up to 3 times
5374
+ try:
5375
+ await git(
5376
+ "push", "-u", "origin", branch,
5377
+ cwd=workspace_path, project_key=project_key,
5378
+ )
5379
+ logger.info("Pushed branch %s to origin (attempt %d)", branch, attempt)
5380
+ last_push_exc = None
5381
+ break
5382
+ except RuntimeError as exc:
5383
+ last_push_exc = exc
5384
+ if attempt < 3:
5385
+ wait = attempt * 10 # 10s, 20s
5386
+ logger.warning(
5387
+ "Push attempt %d failed for branch %s: %s — retrying in %ds",
5388
+ attempt, branch, exc, wait,
5389
+ )
5390
+ await asyncio.sleep(wait)
5391
+ else:
5392
+ logger.error("Push failed for branch %s after 3 attempts: %s", branch, exc)
5393
+ if last_push_exc is not None:
5394
+ return f"Push failed: {last_push_exc}"
5048
5395
  else:
5049
5396
  logger.info("No unpushed commits on %s", branch)
5050
5397
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: forgexa-cli
3
- Version: 1.7.2
3
+ Version: 1.7.6
4
4
  Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
5
5
  Author-email: Jason Sun <dev.winds@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "forgexa-cli"
3
- version = "1.7.2"
3
+ version = "1.7.6"
4
4
  description = "Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform"
5
5
  requires-python = ">=3.9"
6
6
  license = { text = "MIT" }
File without changes
File without changes