PyPI - forgexa-cli - Versions diffs - 1.8.8__tar.gz → 1.9.0__tar.gz - Mend

forgexa-cli 1.8.8tar.gz → 1.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{forgexa_cli-1.8.8 → forgexa_cli-1.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: forgexa-cli
-Version: 1.8.8
+Version: 1.9.0
 Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
 Author-email: Jason Sun <dev.winds@gmail.com>
 License: MIT

{forgexa_cli-1.8.8 → forgexa_cli-1.9.0}/forgexa_cli/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
 """forgexa-cli — Forgexa command-line client."""
-__version__ = "1.8.8"
+__version__ = "1.9.0"

{forgexa_cli-1.8.8 → forgexa_cli-1.9.0}/forgexa_cli/daemon.py RENAMED Viewed

@@ -352,7 +352,11 @@ except (ImportError, ModuleNotFoundError):
         @property
         def AGENT_TIMEOUT(self) -> int:
-            return int(os.environ.get("AGENT_TIMEOUT", "3600"))
+            return int(os.environ.get("AGENT_TIMEOUT", "14400"))  # 4-hour absolute ceiling
+        @property
+        def AGENT_IDLE_TIMEOUT(self) -> int:
+            return int(os.environ.get("AGENT_IDLE_TIMEOUT", "600"))  # 10-min idle (stdout+fs) = hung agent
         @property
         def GIT_CLONE_TIMEOUT(self) -> int:
@@ -392,7 +396,7 @@ except (ImportError, ModuleNotFoundError):
 # DAEMON_VERSION is the protocol/logic version of the daemon code.
 # Kept in sync with pyproject.toml version via bump-version.sh.
 # CLIENT_TYPE identifies which packaging/distribution this daemon runs in.
-DAEMON_VERSION = "1.8.8"
+DAEMON_VERSION = "1.9.0"
 def _detect_client_type() -> str:
@@ -633,6 +637,11 @@ class TaskResult:
     lines_added: int = 0
     lines_removed: int = 0
     error: str = ""
+    # failure_code is forwarded to the server to drive retry policy.
+    # Key values:
+    #   "all_agents_rate_limited" — daemon tried every installed agent, all
+    #       hit rate/quota limits.  Server must NOT retry on the same runtime.
+    failure_code: str = ""
     artifacts: list[dict] = field(default_factory=list)
     observations: list[dict] = field(default_factory=list)
     metrics: dict = field(default_factory=dict)
@@ -1815,6 +1824,13 @@ class WorkspaceManager:
                 if git_prefix_args:
                     env = {**(env or os.environ), "GIT_TERMINAL_PROMPT": "0"}
+        # Always enable long-path support.  On Windows this removes git's own
+        # 260-char path limit (Windows also needs HKLM LongPathsEnabled=1 or
+        # the Win10 1607+ Group Policy, but at a minimum we ensure git won't
+        # reject long paths on platforms where it is already enabled).
+        # On Linux/macOS this is a no-op.
+        longpath_args = ["-c", "core.longpaths=true"]
         # start_new_session=True puts git in its own process group.
         # On timeout we send SIGKILL to the entire group, which includes
         # any ssh/gpg/credential-helper children that git forked — preventing
@@ -1822,7 +1838,7 @@ class WorkspaceManager:
         # Windows note: start_new_session creates a new console process group;
         # we use taskkill /T there instead of killpg.
         proc = await asyncio.create_subprocess_exec(
-            "git", *git_prefix_args, *args,
+            "git", *longpath_args, *git_prefix_args, *args,
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE,
             cwd=str(cwd) if cwd else None,
@@ -2134,10 +2150,54 @@ class ProcessManager:
             "has_turn_failed": has_turn_failed,
             "has_result": has_result,
             "has_meaningful_content": has_meaningful_content,
+            "has_assistant_events": has_assistant_events,
             "error_messages": error_messages,
             "json_line_count": json_line_count,
         }
+    @staticmethod
+    def _should_scan_short_success_stdout(stdout: str, signals: dict[str, Any]) -> bool:
+        """Return True when success-shaped stdout is short enough to be an error blob.
+        Real agent work output can legitimately mention quota/rate-limit terms, so
+        we never scan arbitrary stdout for success cases. The safe exception is a
+        tiny stdout payload with no assistant/result/turn-complete signals; in that
+        shape the CLI usually failed before making a real model call and printed a
+        plain-text transport/quota error like "API Error: 429 ...".
+        """
+        stdout_stripped = (stdout or "").strip()
+        if not stdout_stripped or len(stdout_stripped) >= 500:
+            return False
+        return not (
+            signals.get("has_result")
+            or signals.get("has_turn_completed")
+            or signals.get("has_assistant_events")
+        )
+    @staticmethod
+    def _failure_pattern_channels(result: "TaskResult") -> str:
+        """Build the text window safe to scan for quota/backend failure patterns."""
+        stdout = result.stdout or ""
+        stderr = result.stderr or ""
+        error = result.error or ""
+        if result.status != "success":
+            if result.exit_code == 0:
+                return "\n".join(part for part in (stderr, error) if part)
+            return "\n".join(part for part in (stderr, error, stdout[-3000:]) if part)
+        error_channels = "\n".join(part for part in (stderr, error) if part)
+        has_token_usage = (
+            int(result.metrics.get("token_input", 0) or 0)
+            + int(result.metrics.get("token_output", 0) or 0)
+        ) > 0
+        signals = ProcessManager._extract_output_signals(
+            "\n".join(part for part in (stdout, stderr) if part)
+        )
+        if not has_token_usage and ProcessManager._should_scan_short_success_stdout(stdout, signals):
+            error_channels = "\n".join(filter(None, [error_channels, stdout.strip()]))
+        return error_channels
     @staticmethod
     def has_meaningful_agent_output(result: "TaskResult") -> bool:
         """Return True when the agent emitted real user-meaningful output."""
@@ -2156,29 +2216,12 @@ class ProcessManager:
         Returns True for rate/quota limits AND API unavailability errors,
         since a different agent (using a different API backend) may succeed.
-        IMPORTANT: Only checks stderr and error message.  When exit code is
-        non-zero, also checks the tail of stdout (last 3000 chars) since the
-        error is likely at the end.  When exit code is 0 (agent reported
-        success but _detect_agent_output_failure set status to failed), do
-        NOT scan stdout — it contains the agent's work output (configs, code)
-        which naturally has terms like "rate_limit", "API_RATE_LIMIT_PER_MINUTE"
-        that trigger false positives.
+        For true success cases we still avoid scanning arbitrary stdout.
+        The one safe exception is a tiny stdout payload with no assistant/result
+        signals, which strongly indicates a pre-call CLI failure printed as
+        plain text (for example "API Error: 429 ...").
         """
-        if result.status == "success":
-            return False
-        # When exit code is 0, _detect_agent_output_failure already checked
-        # stderr+error for rate-limit patterns.  Don't re-scan stdout here.
-        if result.exit_code == 0:
-            error_text = (
-                (result.stderr or "")
-                + "\n" + (result.error or "")
-            ).lower()
-        else:
-            error_text = (
-                (result.stderr or "")
-                + "\n" + (result.error or "")
-                + "\n" + (result.stdout or "")[-3000:]
-            ).lower()
+        error_text = ProcessManager._failure_pattern_channels(result).lower()
         return (
             any(p in error_text for p in ProcessManager.RATE_LIMIT_PATTERNS)
             or any(p in error_text for p in ProcessManager.AGENT_UNAVAILABLE_PATTERNS)
@@ -2198,16 +2241,13 @@ class ProcessManager:
         if result.status != "success":
             return None
-        # For exit-code-0 (success) cases, only scan stderr and the error field
-        # for rate-limit / unavailability patterns.  Stdout contains the agent's
-        # actual task output (code, configs, analysis docs) which may legitimately
-        # contain substrings like "rate_limit", "429", "quota", etc. — e.g. writing
-        # a config file with API_RATE_LIMIT_PER_MINUTE=1000 would previously trigger
-        # a false "quota exhaustion" failure even though the agent succeeded.
-        # stdout[-N:] is only safe to scan when the agent already failed (exit != 0),
-        # which is handled by is_rate_limited() called at the orchestrator level.
-        error_only_channels = (result.stderr or "") + "\n" + (result.error or "")
-        pattern_failure = ProcessManager._has_failure_pattern(error_only_channels)
+        # For exit-code-0 (success) cases, avoid scanning arbitrary stdout for
+        # quota keywords. The only safe stdout exception is a tiny payload with
+        # no success signals, which typically means the CLI failed before making
+        # a real model call and printed a plain-text error like "API Error: 429".
+        pattern_failure = ProcessManager._has_failure_pattern(
+            ProcessManager._failure_pattern_channels(result)
+        )
         if pattern_failure:
             return pattern_failure
@@ -2223,8 +2263,13 @@ class ProcessManager:
         has_turn_failed = signals["has_turn_failed"]
         has_result = signals["has_result"]
         has_meaningful_content = signals["has_meaningful_content"]
+        has_assistant_events = signals["has_assistant_events"]
         error_messages = signals["error_messages"]
         json_line_count = signals["json_line_count"]
+        has_token_usage = (
+            int(result.metrics.get("token_input", 0) or 0)
+            + int(result.metrics.get("token_output", 0) or 0)
+        ) > 0
         stderr_lower = stderr.lower()
         if (
@@ -2256,14 +2301,27 @@ class ProcessManager:
                 and not has_meaningful_content and json_line_count > 0):
             return f"Agent encountered errors without producing output: {error_messages[0]}"
+        # Plain-text pre-call failures (not JSONL) can still exit 0 on some
+        # agent CLIs. When stdout is tiny and lacks any structural success
+        # signals, treat explicit error markers as agent failure so fallback can
+        # run locally instead of relying on server-side re-enqueue.
+        stdout_stripped = stdout.strip()
+        stdout_lower = stdout_stripped.lower()
+        if (
+            not has_token_usage
+            and ProcessManager._should_scan_short_success_stdout(stdout, signals)
+            and any(marker in stdout_lower for marker in ("api error", "exception", "forbidden", "unauthorized"))
+        ):
+            return stdout_stripped.splitlines()[-1][:300]
         # ── Claude: JSON output mode but no result object and no content ──
         if agent_id == "claude" and json_line_count > 0:
-            if not has_result and not has_meaningful_content:
+            if not has_result and not has_meaningful_content and not has_assistant_events:
                 return "Claude produced no result output"
         # ── Copilot: JSONL mode but no turn completion and no content ──
         if agent_id == "copilot" and json_line_count > 0:
-            if not has_result and not has_meaningful_content:
+            if not has_result and not has_meaningful_content and not has_assistant_events:
                 return "Copilot produced no result output (check GitHub authentication: run 'gh auth login')"
         return None
@@ -2331,8 +2389,9 @@ class ProcessManager:
         return normalized
     def _required_deliverable_paths(self, task: TaskInfo) -> set[str]:
-        # For analysis nodes, deliverables live in analysis_output_dir (docs/requirements/...)
-        # For other nodes, use output_dir (docs/implements/...)
+        # For analysis nodes, deliverables live in analysis_output_dir (docs/requirements/<key>/analysis)
+        # For delivery nodes, deliverables live in output_dir (docs/requirements/<key>/delivery)
+        # For other nodes, use output_dir (docs/requirements/<key>/implement)
         if task.node_type == "analysis":
             output_dir = str(
                 (task.input_data or {}).get("analysis_output_dir", "")
@@ -2350,6 +2409,9 @@ class ProcessManager:
             required_files = _get_analysis_outputs_for_type(req_type)
         elif task.node_type == "design":
             required_files = ["design.md"]
+        elif task.node_type == "delivery":
+            # Required docs come from node input_data (set by delivery_doc_service)
+            required_files = (task.input_data or {}).get("required_docs") or ["release-note.md"]
         else:
             return set()
@@ -3503,6 +3565,7 @@ class ProgressReporter:
             "stdout_tail": result.stdout[-20000:] if result.stdout else "",
             "stderr_tail": result.stderr[-5000:] if result.stderr else "",
             "error": result.error,
+            "failure_code": result.failure_code,
             "files_changed": result.files_changed,
             "lines_added": result.lines_added,
             "lines_removed": result.lines_removed,
@@ -4402,7 +4465,52 @@ class RuntimeDaemon:
             )
             logger.info("Workspace ready: %s", workspace_path)
-            # 2.5 Wipe the analysis output directory on fresh analysis so the new
+            # 2.1 Workspace health check: detect broken checkout (Windows filename-
+            # too-long or other git checkout failure that leaves the working tree
+            # empty while the git index still tracks all source files).
+            # If this is not caught the agent will run `git add -A` and commit a
+            # catastrophic mass-deletion (e.g. SI-434: 47,566 files deleted).
+            try:
+                _index_count_out = await self._git(
+                    "ls-files", "--cached", "--", ".", cwd=workspace_path,
+                    timeout=30,
+                )
+                _index_count = len([l for l in _index_count_out.splitlines() if l.strip()])
+                if _index_count > 500:
+                    # Count physical files (exclude .git/)
+                    _phys_count = sum(1 for _ in workspace_path.rglob("*")
+                                      if _.is_file() and ".git" not in _.parts)
+                    _ratio = _phys_count / _index_count
+                    if _ratio < 0.20:
+                        # Less than 20 % of tracked files exist on disk — almost
+                        # certainly a failed git checkout (e.g. Windows path-length
+                        # limit).  Abort rather than letting the agent commit a
+                        # mass-deletion.
+                        _longpath_hint = (
+                            " Enable Windows long-path support: run "
+                            "`git config --global core.longpaths true` and enable "
+                            "LongPathsEnabled in Windows Group Policy / Registry "
+                            "(HKLM\\SYSTEM\\CurrentControlSet\\Control\\FileSystem\\LongPathsEnabled=1)."
+                            if sys.platform == "win32" else ""
+                        )
+                        raise RuntimeError(
+                            f"Workspace health check failed: only {_phys_count}/{_index_count} "
+                            f"tracked files exist on disk ({_ratio:.0%}).  "
+                            f"The git checkout likely failed due to filename-length limitations."
+                            f"{_longpath_hint}"
+                        )
+                    elif _ratio < 0.80:
+                        logger.warning(
+                            "Workspace health check warning: only %d/%d tracked files "
+                            "exist on disk (%.0f%%) for task %s — checkout may be incomplete.",
+                            _phys_count, _index_count, _ratio * 100, task.task_id,
+                        )
+            except RuntimeError:
+                raise
+            except Exception as _health_exc:
+                logger.warning("Workspace health check error (non-fatal): %s", _health_exc)
             # agent run starts from a completely clean slate.  This covers:
             #   • Type change: removes old-type files (e.g. PRD.md/SDD.md) so they
             #     don't coexist with the new type's files (e.g. diagnosis.md).
@@ -4630,6 +4738,10 @@ class RuntimeDaemon:
                         f"Original error: {result.error}"
                     )
                     result.status = "failed"
+                    # Signal to the server that ALL installed agents were tried and
+                    # all are rate/quota limited.  The server must NOT re-enqueue on
+                    # the same runtime — that would hit the same quota wall.
+                    result.failure_code = "all_agents_rate_limited"
             # 4. Collect git info BEFORE commit (shows uncommitted changes)
             pre_commit_git = await self.process_manager._collect_git_info(workspace_path)
@@ -4715,6 +4827,72 @@ class RuntimeDaemon:
                 except Exception:
                     logger.exception("Validation gate error for task %s (proceeding anyway)", task.task_id)
+            # 4.6 Post-validation rate-limit fallback.
+            # _validate_and_retry returns early (preserving the rate-limit error)
+            # when the agent hits a quota wall mid-retry.  The initial-run fallback
+            # block (step 3) only checked the *initial* run; if that succeeded but
+            # the agent became rate-limited during a validation retry, we need a
+            # second fallback pass here so the task is attempted on a fresh agent.
+            if result.status == "failed" and self.process_manager.is_rate_limited(result) and not _skip_fallback:
+                logger.warning(
+                    "Agent '%s' rate-limited during validation retry for task %s — "
+                    "attempting post-validation agent fallback",
+                    agent.agent_id, task.task_id,
+                )
+                _pv_fallback = self._select_fallback_agent(
+                    agent.agent_id, task.fallback_chain, tried_agents
+                )
+                while _pv_fallback:
+                    logger.info(
+                        "Post-validation fallback: '%s' → '%s' for task %s",
+                        agent.agent_id, _pv_fallback.agent_id, task.task_id,
+                    )
+                    agent = _pv_fallback
+                    tried_agents.add(agent.agent_id)
+                    await reporter.report_progress(
+                        task.task_id, 10,
+                        f"agent_fallback: retrying with {agent.agent_id}",
+                        output_lines=[
+                            f"[daemon] Agent rate-limited during validation, "
+                            f"switching to {agent.agent_id}",
+                        ],
+                    )
+                    result = await self.process_manager.run_agent(
+                        agent, task, workspace_path, on_chunk=on_output_chunk,
+                    )
+                    if not self.process_manager.is_rate_limited(result):
+                        # Fallback agent ran successfully (or hit a non-rate-limit
+                        # failure) — re-run the validation gate and update git state.
+                        if result.status == "success":
+                            try:
+                                result = await self._validate_and_retry(
+                                    agent, task, workspace_path, result,
+                                    reporter, on_output_chunk, max_retries=2,
+                                )
+                                pre_commit_git = await self.process_manager._collect_git_info(workspace_path)
+                            except Exception:
+                                logger.exception(
+                                    "Post-validation gate error for task %s (proceeding anyway)",
+                                    task.task_id,
+                                )
+                        break
+                    logger.warning(
+                        "Post-validation fallback agent '%s' also rate-limited for task %s",
+                        agent.agent_id, task.task_id,
+                    )
+                    _pv_fallback = self._select_fallback_agent(
+                        agent.agent_id, task.fallback_chain, tried_agents
+                    )
+                # If every agent we tried is still rate-limited, signal the server
+                # NOT to re-enqueue — it would hit the same quota wall immediately.
+                if self.process_manager.is_rate_limited(result):
+                    result.error = (
+                        f"All agents unavailable/rate-limited (tried: {', '.join(tried_agents)}). "
+                        f"Original error: {result.error}"
+                    )
+                    result.status = "failed"
+                    result.failure_code = "all_agents_rate_limited"
             # 4.55 Analysis/design nodes must update their deliverables in THIS run.
             # Existing files from a prior iteration are not sufficient evidence.
             if result.status == "success" and task.node_type in ("analysis", "design"):
@@ -5196,6 +5374,38 @@ class RuntimeDaemon:
             # Flush any remaining buffered lines after agent finishes
             await _flush_output_to_server()
+            # 3.5 Agent fallback: if the chosen agent hit a rate/quota limit,
+            # try the next available agent before giving up.
+            _aj_tried: set[str] = {agent.agent_id}
+            while self.process_manager.is_rate_limited(result):
+                _aj_fallback = self._select_fallback_agent(agent.agent_id, [], _aj_tried)
+                if not _aj_fallback:
+                    # All agents exhausted — signal server not to re-enqueue.
+                    result.failure_code = "all_agents_rate_limited"
+                    break
+                logger.warning(
+                    "AIJob %s: agent '%s' rate-limited, falling back to '%s'",
+                    job_id, agent.agent_id, _aj_fallback.agent_id,
+                )
+                await conn.client.post(
+                    f"{reporter_url}/progress",
+                    json={
+                        "current_step": f"agent_fallback: retrying with {_aj_fallback.agent_id}",
+                        "output_lines": [
+                            f"[daemon] Agent rate-limited, switching to {_aj_fallback.agent_id}",
+                        ],
+                        "progress_pct": 15,
+                    },
+                    timeout=5,
+                )
+                agent = _aj_fallback
+                _aj_tried.add(agent.agent_id)
+                fake_task.agent_type = agent.agent_id
+                result = await self.process_manager.run_agent(
+                    agent, fake_task, workspace_path, on_chunk=on_chunk,
+                )
+                await _flush_output_to_server()
             # 4. Auto-commit if successful
             input_ctx = aj.get("input_context", {})
             git_info = {}
@@ -5242,6 +5452,10 @@ class RuntimeDaemon:
                             except Exception:
                                 pass
+            # Preserve all_agents_rate_limited so the server does NOT re-enqueue.
+            _failure_code = result.failure_code if result.failure_code else (
+                "agent_error" if result.status != "success" else ""
+            )
             complete_payload = {
                 "status": "success" if result.status == "success" else "failed",
                 "output_content": output_content,
@@ -5255,7 +5469,7 @@ class RuntimeDaemon:
                 "resolved_agent": agent.agent_id,
                 "git_info": git_info,
                 "error": result.error if result.status != "success" else "",
-                "failure_code": "agent_error" if result.status != "success" else "",
+                "failure_code": _failure_code,
             }
             await conn.client.post(
@@ -5315,7 +5529,14 @@ class RuntimeDaemon:
                 ],
             )
-            # Build a targeted fix prompt with output directory context
+            # Save the original prompt BEFORE building the retry variant so we
+            # can include it in fix_prompt.  Without this the agent receives only
+            # "fix validation errors" with zero task context and responds with
+            # "I don't have a specific task to execute yet." (root cause confirmed
+            # via Copilot JSONL output for SI-434/SI-446).
+            original_prompt = task.input_prompt
+            # Build a targeted fix prompt: original task + validation issues.
             _input = task.input_data or {}
             _fix_doc_dir = (
                 _input.get("output_dir")
@@ -5323,8 +5544,11 @@ class RuntimeDaemon:
                 or ""
             )
             fix_prompt = (
-                "The previous execution produced output with validation errors.\n"
-                "Please fix ALL of the following issues:\n\n"
+                f"{original_prompt}\n\n"
+                "---\n\n"
+                "**IMPORTANT – Validation Retry:** The previous execution attempt "
+                "did not produce all required output. Please complete the task above "
+                "and ensure ALL of the following issues are resolved:\n\n"
                 f"{issues_text}\n\n"
             )
             if _fix_doc_dir:
@@ -5339,7 +5563,6 @@ class RuntimeDaemon:
             )
             # Override task prompt temporarily
-            original_prompt = task.input_prompt
             task.input_prompt = fix_prompt
             try:
@@ -5349,6 +5572,20 @@ class RuntimeDaemon:
             finally:
                 task.input_prompt = original_prompt
+            # If the agent hit a rate/quota limit during this validation retry,
+            # bail out immediately so the outer execution loop can trigger agent
+            # fallback.  Continuing to retry with the same rate-limited agent is
+            # pointless; it will hit the same wall every time.
+            # Returning early also preserves the rate-limit error in result.error
+            # so that is_rate_limited() can detect it in the caller.
+            if ProcessManager.is_rate_limited(result):
+                logger.warning(
+                    "Agent '%s' rate-limited during validation retry for task %s "
+                    "(attempt %d/%d) — aborting validation retries for agent fallback",
+                    agent.agent_id, task.task_id, attempt + 1, max_retries,
+                )
+                return result
         # Final check after all retries
         remaining = self._validate_outputs(workspace_path, task, result)
         if remaining:
@@ -5822,7 +6059,7 @@ class RuntimeDaemon:
         for f in files[:30]:
             path = f["path"].lower()
             fname = path.rsplit("/", 1)[-1]
-            if "docs/requirements" in path:
+            if "docs/requirements" in path or "docs/workitems" in path:
                 buckets["Analysis deliverables"].append(f)
             elif (
                 "_test" in fname or fname.startswith("test_")
@@ -6032,24 +6269,49 @@ class RuntimeDaemon:
         # 5. Run the agent with the conflict resolution prompt
         logger.info("Invoking %s to resolve %d conflict(s)...", agent.agent_id, len(conflicted_files))
         try:
+            _cr_task = TaskInfo(
+                task_id=f"{task.task_id}-conflict-resolve",
+                graph_id=task.graph_id,
+                node_type="conflict_resolution",
+                agent_type=agent.agent_id,
+                input_prompt=resolve_prompt,
+                input_data={},
+                timeout_seconds=min(task.timeout_seconds, 300),  # cap at 5 min
+                max_retries=0,
+                retry_count=0,
+                project=task.project,
+                work_item=task.work_item,
+            )
             resolve_result = await self.process_manager.run_agent(
                 agent,
-                TaskInfo(
-                    task_id=f"{task.task_id}-conflict-resolve",
-                    graph_id=task.graph_id,
-                    node_type="conflict_resolution",
-                    agent_type=agent.agent_id,
-                    input_prompt=resolve_prompt,
-                    input_data={},
-                    timeout_seconds=min(task.timeout_seconds, 300),  # cap at 5 min
-                    max_retries=0,
-                    retry_count=0,
-                    project=task.project,
-                    work_item=task.work_item,
-                ),
+                _cr_task,
                 workspace_path,
             )
+            # Agent fallback: if the primary agent is rate-limited, try others.
+            _cr_tried: set[str] = {agent.agent_id}
+            while self.process_manager.is_rate_limited(resolve_result):
+                _cr_fallback = self._select_fallback_agent(agent.agent_id, task.fallback_chain, _cr_tried)
+                if not _cr_fallback:
+                    logger.warning(
+                        "All agents rate-limited for conflict resolution of task %s — aborting merge",
+                        task.task_id,
+                    )
+                    try:
+                        await git("merge", "--abort", cwd=workspace_path)
+                    except RuntimeError:
+                        pass
+                    return
+                logger.warning(
+                    "Conflict resolution: agent '%s' rate-limited for task %s, "
+                    "falling back to '%s'",
+                    agent.agent_id, task.task_id, _cr_fallback.agent_id,
+                )
+                agent = _cr_fallback
+                _cr_tried.add(agent.agent_id)
+                _cr_task.agent_type = agent.agent_id
+                resolve_result = await self.process_manager.run_agent(agent, _cr_task, workspace_path)
             # 6. Check if conflicts are resolved
             proc = await asyncio.create_subprocess_exec(
                 "git", "diff", "--name-only", "--diff-filter=U",

{forgexa_cli-1.8.8 → forgexa_cli-1.9.0}/forgexa_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: forgexa-cli
-Version: 1.8.8
+Version: 1.9.0
 Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
 Author-email: Jason Sun <dev.winds@gmail.com>
 License: MIT

{forgexa_cli-1.8.8 → forgexa_cli-1.9.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "forgexa-cli"
-version = "1.8.8"
+version = "1.9.0"
 description = "Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform"
 requires-python = ">=3.9"
 license = { text = "MIT" }