PyPI - forgexa-cli - Versions diffs - 1.3.2__tar.gz → 1.4.2__tar.gz - Mend

forgexa-cli 1.3.2tar.gz → 1.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: forgexa-cli
-Version: 1.3.2
+Version: 1.4.2
 Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
 Author-email: Jason Sun <dev.winds@gmail.com>
 License: MIT

{forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
 """forgexa-cli — Forgexa command-line client."""
-__version__ = "1.3.2"
+__version__ = "1.4.2"

{forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/daemon.py RENAMED Viewed

@@ -37,16 +37,198 @@ try:
 except ImportError:
     fcntl = None  # type: ignore[assignment]
-try:
-    import httpx
-except ImportError:
-    # Auto-install httpx when running standalone (e.g., bundled with desktop app)
-    subprocess.check_call(
-        [sys.executable, "-m", "pip", "install", "--quiet", "httpx>=0.24"],
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.PIPE,
+# ── httpx dependency — robust auto-install for standalone environments ──
+# When running inside the backend package, httpx is a declared dependency and
+# always available.  In standalone contexts (desktop AppImage/DMG/MSI, CLI
+# without [daemon] extra), httpx may be missing.  We try multiple strategies:
+#
+#   1. Direct import (works for backend & CLI[daemon])
+#   2. Import from cached deps dir (~/.forgexa/daemon/deps)
+#   3. Auto-install via pip --target to the cached deps dir
+#      (bypasses PEP 668 / externally-managed-environment on modern distros)
+#   4. Friendly error with OS-specific instructions if all else fails
+_HTTPX_DEPS_DIR = os.path.join(str(Path.home()), ".forgexa", "daemon", "deps")
+def _try_install_httpx(deps_dir: str) -> tuple[bool, str]:
+    """Try to install httpx to a user-writable directory.
+    Uses pip --target which works on:
+    - AppImage (read-only squashfs, system Python)
+    - PEP 668 systems (Ubuntu 23.04+, Fedora 38+) — bypasses externally-managed check
+    - macOS .app bundles (sandboxed Python)
+    - Windows portable installs
+    - Docker containers with read-only system dirs
+    Returns (success, error_detail).
+    """
+    os.makedirs(deps_dir, exist_ok=True)
+    python = sys.executable or "python3"
+    # Try pip --target first (most universally compatible).
+    # Falls back to --user, then --break-system-packages as last resort.
+    # We explicitly list httpcore alongside httpx because pip --target may
+    # skip transitive deps it finds in system site-packages, even though
+    # they won't be importable from the isolated deps directory.
+    strategies: list[tuple[str, list[str]]] = [
+        (
+            "pip install --target (isolated deps)",
+            [python, "-m", "pip", "install", "--target", deps_dir,
+             "--quiet", "--upgrade", "httpx>=0.24", "httpcore"],
+        ),
+        (
+            "pip install --user",
+            [python, "-m", "pip", "install", "--user", "--quiet",
+             "httpx>=0.24", "httpcore"],
+        ),
+        (
+            "pip install --break-system-packages",
+            [python, "-m", "pip", "install", "--quiet",
+             "--break-system-packages", "httpx>=0.24", "httpcore"],
+        ),
+    ]
+    last_error = ""
+    for label, cmd in strategies:
+        try:
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
+                text=True,
+                timeout=120,
+            )
+            if result.returncode == 0:
+                return True, ""
+            last_error = f"[{label}] exit code {result.returncode}"
+            stderr_text = (result.stderr or "").strip()
+            if stderr_text:
+                # Keep last 5 lines of stderr for diagnostics
+                stderr_lines = stderr_text.splitlines()[-5:]
+                last_error += ": " + " | ".join(stderr_lines)
+        except FileNotFoundError:
+            last_error = f"[{label}] Python not found: {cmd[0]}"
+        except subprocess.TimeoutExpired:
+            last_error = f"[{label}] timed out after 120s"
+        except Exception as exc:
+            last_error = f"[{label}] {type(exc).__name__}: {exc}"
+    return False, last_error
+def _die_missing_httpx(detail: str) -> None:
+    """Print a clear, actionable error and exit when httpx cannot be loaded."""
+    os_name = platform.system()
+    python_path = sys.executable or "(unknown)"
+    if os_name == "Linux":
+        hints = [
+            "pip3 install --user httpx",
+            "sudo apt install python3-httpx          # Debian/Ubuntu",
+            "sudo dnf install python3-httpx          # Fedora/RHEL",
+            "pip3 install forgexa-cli[daemon]",
+        ]
+    elif os_name == "Darwin":
+        hints = [
+            "pip3 install httpx",
+            "brew install python3 && pip3 install httpx",
+            "pip3 install forgexa-cli[daemon]",
+        ]
+    elif os_name == "Windows":
+        hints = [
+            "pip install httpx",
+            "pip install forgexa-cli[daemon]",
+        ]
+    else:
+        hints = [
+            "pip3 install httpx",
+            "pip3 install forgexa-cli[daemon]",
+        ]
+    hint_lines = "\n".join(f"    {h}" for h in hints)
+    msg = (
+        "\n"
+        "┌─────────────────────────────────────────────────────────────────────┐\n"
+        "│  Forgexa Daemon: missing required dependency 'httpx'               │\n"
+        "└─────────────────────────────────────────────────────────────────────┘\n"
+        "\n"
+        "  The daemon requires the 'httpx' HTTP client library but it could\n"
+        "  not be imported, and automatic installation failed.\n"
+        "\n"
+        f"  Python:    {python_path}\n"
+        f"  Platform:  {os_name} ({platform.machine()})\n"
+        f"  Detail:    {detail}\n"
+        "\n"
+        "  Please install it manually with one of these commands:\n"
+        "\n"
+        f"{hint_lines}\n"
+        "\n"
+        "  Then restart the daemon.\n"
+        "─────────────────────────────────────────────────────────────────────\n"
     )
-    import httpx
+    print(msg, file=sys.stderr)
+    # Machine-readable summary for the desktop app to parse and show as a toast.
+    print(f"DAEMON_ERROR: Missing required Python package 'httpx'. {detail}", file=sys.stderr)
+    sys.exit(1)
+def _validate_httpx_imports() -> tuple[bool, str]:
+    """Validate that httpx and its critical transitive deps are importable.
+    A bare ``import httpx`` can succeed even when httpcore is missing,
+    because httpx lazily imports its transport layer.  We eagerly check
+    the full chain so the daemon fails fast with a clear message instead
+    of crashing mid-operation when ``httpx.AsyncClient()`` tries to load
+    the transport.
+    Returns (ok, missing_module_name).
+    """
+    for mod_name in ("httpx", "httpcore"):
+        try:
+            __import__(mod_name)
+        except ImportError:
+            return False, mod_name
+    return True, ""
+# Actual import sequence
+_httpx_ok, _httpx_missing = _validate_httpx_imports()
+if not _httpx_ok:
+    # Check cached deps directory (previous auto-install)
+    if _HTTPX_DEPS_DIR not in sys.path:
+        sys.path.insert(0, _HTTPX_DEPS_DIR)
+    _httpx_ok, _httpx_missing = _validate_httpx_imports()
+if not _httpx_ok:
+    # If httpx is present but a sub-dependency (httpcore) is missing,
+    # the deps directory has a partial/stale installation.  Clear it and
+    # purge cached modules so pip does a clean install with all transitive
+    # dependencies.
+    if _httpx_missing != "httpx":
+        shutil.rmtree(_HTTPX_DEPS_DIR, ignore_errors=True)
+        for _mod_key in list(sys.modules):
+            if _mod_key in ("httpx", "httpcore") or \
+               _mod_key.startswith(("httpx.", "httpcore.")):
+                del sys.modules[_mod_key]
+    # Attempt auto-install to user-writable deps directory
+    _ok, _err = _try_install_httpx(_HTTPX_DEPS_DIR)
+    if _ok:
+        if _HTTPX_DEPS_DIR not in sys.path:
+            sys.path.insert(0, _HTTPX_DEPS_DIR)
+        _httpx_ok, _httpx_missing = _validate_httpx_imports()
+        if not _httpx_ok:
+            _die_missing_httpx(
+                f"pip install succeeded but '{_httpx_missing}' still cannot "
+                "be imported — check Python version compatibility"
+            )
+    else:
+        _die_missing_httpx(_err)
+import httpx  # noqa: E402  — guaranteed available after validation above
+del _httpx_ok, _httpx_missing
 # ── Settings: graceful fallback when running standalone (outside backend package) ──
 try:
@@ -117,6 +299,36 @@ except (ImportError, ModuleNotFoundError):
     settings = _StandaloneSettings()  # type: ignore[assignment]
+# ── Daemon version and client type ────────────────────────────────────────
+# DAEMON_VERSION is the protocol/logic version of the daemon code.
+# Kept in sync with pyproject.toml version via bump-version.sh.
+# CLIENT_TYPE identifies which packaging/distribution this daemon runs in.
+DAEMON_VERSION = "1.4.2"
+def _detect_client_type() -> str:
+    """Auto-detect client type from runtime context.
+    Priority:
+      1. FORGEXA_CLIENT_TYPE env var (set by desktop Tauri launcher)
+      2. Import context: app.config importable → "server"
+      3. Default: "cli" (standalone pip-installed daemon)
+    This allows a single daemon.py source to work correctly regardless
+    of deployment context, making the bundle-daemon.sh copy safe.
+    """
+    env_type = os.environ.get("FORGEXA_CLIENT_TYPE", "").strip().lower()
+    if env_type in ("server", "cli", "desktop"):
+        return env_type
+    # Server: app.config was successfully imported at module level above
+    if "app.config" in sys.modules:
+        return "server"
+    # Default: standalone execution = CLI
+    return "cli"
+_CLIENT_TYPE = _detect_client_type()
 # ── Logging — self-managed file handler ────────────────────────────────
 # The daemon configures its own FileHandler so logs are written to
 # ~/.forgexa/daemon/daemon.log regardless of how the daemon was launched
@@ -331,6 +543,31 @@ class TaskResult:
     git: dict = field(default_factory=dict)
+# ── Type-aware analysis outputs (inline fallback for standalone daemons) ──
+# Mirrors type_workflow_profiles.py — used when import is unavailable (CLI/Desktop).
+_ANALYSIS_OUTPUTS_BY_TYPE: dict[str, list[str]] = {
+    "feature": ["PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json"],
+    "bugfix": ["diagnosis.md", "TASKS.md", "analysis.json", "test-intent.json"],
+    "refactor": ["refactor-plan.md", "TASKS.md", "analysis.json"],
+    "documentation": ["outline.md", "analysis.json"],
+    "improvement": ["improvement-spec.md", "TASKS.md", "analysis.json", "test-intent.json"],
+    "task": ["task-plan.md", "analysis.json"],
+}
+def _get_analysis_outputs_for_type(req_type: str) -> list[str]:
+    """Get expected analysis output files for a requirement type.
+    Tries to use type_workflow_profiles (available in backend context),
+    falls back to inline mapping for standalone daemon execution.
+    """
+    try:
+        from app.services.type_workflow_profiles import get_profile
+        return list(get_profile(req_type).analysis_outputs)
+    except Exception:
+        return _ANALYSIS_OUTPUTS_BY_TYPE.get(req_type, _ANALYSIS_OUTPUTS_BY_TYPE["feature"])
 # ── Agent Discovery ──
@@ -993,9 +1230,19 @@ class WorkspaceManager:
                         os.write(fd, b"\n")
                     os.close(fd)
                     os.chmod(key_path, stat_mod.S_IRUSR)
+                    # On Windows, convert backslashes to forward slashes and quote
+                    # the path. Git invokes GIT_SSH_COMMAND via MSYS2 shell which
+                    # interprets backslashes as escape sequences, corrupting the
+                    # path (e.g. C:\Users → C:Users).
+                    key_path_safe = key_path.replace("\\", "/") if sys.platform == "win32" else key_path
                     env = {
                         **os.environ,
-                        "GIT_SSH_COMMAND": f"ssh -i {key_path} -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null",
+                        "GIT_SSH_COMMAND": (
+                            f'ssh -i "{key_path_safe}"'
+                            f" -o StrictHostKeyChecking=accept-new"
+                            f" -o UserKnownHostsFile=/dev/null"
+                            f" -o IdentitiesOnly=yes"
+                        ),
                     }
                 except Exception:
                     try:
@@ -1031,10 +1278,12 @@ class WorkspaceManager:
             # Clean up temp SSH key file if created
             if env and "GIT_SSH_COMMAND" in env:
                 import re as _re
-                m = _re.search(r"-i\s+(\S+)", env["GIT_SSH_COMMAND"])
+                m = _re.search(r'-i\s+"?([^"\s]+)"?', env["GIT_SSH_COMMAND"])
                 if m:
+                    key_file = m.group(1)
+                    # Resolve forward-slash path back to native for unlink
                     try:
-                        os.unlink(m.group(1))
+                        os.unlink(key_file)
                     except OSError:
                         pass
         if proc.returncode != 0:
@@ -1054,15 +1303,11 @@ class ProcessManager:
         "usage limit",
         "rate limit",
         "rate_limit",
-        "429",
         "quota exceeded",
         "too many requests",
         "overloaded",
-        "capacity",
-        "try again",
-        "credit",
         "insufficient_quota",
-        "billing",
+        "billing hard limit",
     ]
     # Patterns indicating the agent's API is unreachable/misconfigured —
@@ -1074,9 +1319,11 @@ class ProcessManager:
         "connection refused",
         "connection reset",
         "connection timed out",
+        "connection error",
         "name or service not known",
         "no such host",
         "network is unreachable",
+        "api error",
     ]
     def __init__(self):
@@ -1127,8 +1374,12 @@ class ProcessManager:
                 elif isinstance(err, str):
                     error_messages.append(err)
             elif ev_type == "result":
-                has_result = True
-                has_meaningful_content = True
+                if data.get("is_error"):
+                    err_text = str(data.get("result", "") or data.get("error", "") or "result marked as error")
+                    error_messages.append(err_text)
+                else:
+                    has_result = True
+                    has_meaningful_content = True
             elif ev_type == "error":
                 msg = data.get("message", "")
                 if msg:
@@ -1170,13 +1421,25 @@ class ProcessManager:
         Returns True for rate/quota limits AND API unavailability errors,
         since a different agent (using a different API backend) may succeed.
+        IMPORTANT: Only checks stderr, error message, and the tail of stdout.
+        The full stdout contains the agent's work output (e.g., analysis text
+        about APIs, retry logic, HTTP status codes) which naturally contains
+        patterns like "429", "try again", "capacity" — these are NOT indicators
+        of the agent CLI itself being rate-limited.
         """
         if result.status == "success":
             return False
-        combined = (result.stdout + result.stderr + result.error).lower()
+        # Search error channels: stderr (CLI errors) + error message + tail of stdout
+        # (last 3000 chars catches any CLI-level error at the end of output)
+        error_text = (
+            (result.stderr or "")
+            + "\n" + (result.error or "")
+            + "\n" + (result.stdout or "")[-3000:]
+        ).lower()
         return (
-            any(p in combined for p in ProcessManager.RATE_LIMIT_PATTERNS)
-            or any(p in combined for p in ProcessManager.AGENT_UNAVAILABLE_PATTERNS)
+            any(p in error_text for p in ProcessManager.RATE_LIMIT_PATTERNS)
+            or any(p in error_text for p in ProcessManager.AGENT_UNAVAILABLE_PATTERNS)
         )
     @staticmethod
@@ -1193,8 +1456,16 @@ class ProcessManager:
         if result.status != "success":
             return None
-        combined = "\n".join(part for part in (result.stdout, result.stderr, result.error) if part)
-        pattern_failure = ProcessManager._has_failure_pattern(combined)
+        # For rate/unavailability pattern detection, only check error channels
+        # (stderr, error field) plus the TAIL of stdout.  The full stdout contains
+        # the agent's work output (analysis text, generated docs) which naturally
+        # mentions terms like "rate limit", "429", "capacity", "credit" etc.
+        error_channels = (
+            (result.stderr or "")
+            + "\n" + (result.error or "")
+            + "\n" + (result.stdout or "")[-3000:]
+        )
+        pattern_failure = ProcessManager._has_failure_pattern(error_channels)
         if pattern_failure:
             return pattern_failure
@@ -1308,18 +1579,23 @@ class ProcessManager:
         return normalized
     def _required_deliverable_paths(self, task: TaskInfo) -> set[str]:
-        output_dir = str((task.input_data or {}).get("output_dir", "") or "")
+        # For analysis nodes, deliverables live in analysis_output_dir (docs/requirements/...)
+        # For other nodes, use output_dir (docs/implements/...)
+        if task.node_type == "analysis":
+            output_dir = str(
+                (task.input_data or {}).get("analysis_output_dir", "")
+                or (task.input_data or {}).get("output_dir", "")
+                or ""
+            )
+        else:
+            output_dir = str((task.input_data or {}).get("output_dir", "") or "")
         output_dir = output_dir.replace("\\", "/").lstrip("./").rstrip("/")
         if not output_dir:
             return set()
         if task.node_type == "analysis":
             req_type = (task.input_data or {}).get("requirement_type", "feature")
-            try:
-                from app.services.type_workflow_profiles import get_profile
-                required_files = list(get_profile(req_type).analysis_outputs)
-            except Exception:
-                required_files = ["PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json"]
+            required_files = _get_analysis_outputs_for_type(req_type)
         elif task.node_type == "design":
             required_files = ["design.md"]
         else:
@@ -1330,7 +1606,8 @@ class ProcessManager:
     def _has_required_deliverable_updates(self, task: TaskInfo, *path_lists: list[str] | None) -> bool:
         required_paths = self._required_deliverable_paths(task)
         if not required_paths:
-            return False
+            # Cannot determine required deliverables — skip check (don't fail)
+            return True
         changed_paths: set[str] = set()
         for paths in path_lists:
@@ -1422,10 +1699,19 @@ class ProcessManager:
             while True:
                 try:
                     line_bytes = await proc.stdout.readline()
-                except ValueError:
-                    # Line exceeded stream buffer limit – fall back to reading
-                    # remaining data in bulk to avoid losing output.
-                    remaining = await proc.stdout.read()
+                except (ValueError, asyncio.LimitOverrunError, Exception) as exc:
+                    # Line exceeded stream buffer limit (LimitOverrunError
+                    # converted to ValueError by readline(), but catch broadly
+                    # to handle edge cases in different Python versions).
+                    # Fall back to reading remaining data in bulk.
+                    logger.warning(
+                        "Stream read error for task %s (%s: %s), draining remaining output",
+                        task_id, type(exc).__name__, exc,
+                    )
+                    try:
+                        remaining = await proc.stdout.read()
+                    except Exception:
+                        remaining = b""
                     if remaining:
                         for chunk_line in remaining.decode(errors="replace").split("\n"):
                             if chunk_line:
@@ -1481,7 +1767,7 @@ class ProcessManager:
         cmd = [
             agent.command,
             "-p",
-            "--output-format", "json",
+            "--output-format", "stream-json",
             "--verbose",
             "--dangerously-skip-permissions",
         ]
@@ -1499,7 +1785,7 @@ class ProcessManager:
                 stdin=asyncio.subprocess.PIPE,
                 cwd=str(cwd),
                 env=env,
-                limit=10 * 1024 * 1024,  # 10MB line buffer for large JSON output
+                limit=100 * 1024 * 1024,  # 100MB line buffer for large JSON output from long sessions
             )
             self.active_processes[task_id] = proc
             stdout, stderr, returncode = await self._stream_process(
@@ -1533,6 +1819,17 @@ class ProcessManager:
                 status="failed", exit_code=-1, stdout="", stderr="",
                 error=f"Timed out after {timeout}s",
             )
+        except Exception as exc:
+            logger.exception("Claude stream error for task %s", task_id)
+            if task_id in self.active_processes:
+                try:
+                    self.active_processes[task_id].kill()
+                except Exception:
+                    pass
+            return TaskResult(
+                status="failed", exit_code=-1, stdout="", stderr="",
+                error=f"Stream processing error: {exc}",
+            )
         finally:
             self.active_processes.pop(task_id, None)
@@ -1600,7 +1897,7 @@ class ProcessManager:
                 stderr=asyncio.subprocess.PIPE,
                 stdin=asyncio.subprocess.PIPE if stdin_input else None,
                 cwd=str(cwd),
-                limit=10 * 1024 * 1024,  # 10MB line buffer for large agent output
+                limit=100 * 1024 * 1024,  # 100MB line buffer for large agent output
             )
             self.active_processes[task_id] = proc
             stdin_bytes = stdin_input.encode() if stdin_input else None
@@ -1622,6 +1919,17 @@ class ProcessManager:
                 status="failed", exit_code=-1, stdout="", stderr="",
                 error=f"Timed out after {timeout}s",
             )
+        except Exception as exc:
+            logger.exception("CLI stream error for task %s", task_id)
+            if task_id in self.active_processes:
+                try:
+                    self.active_processes[task_id].kill()
+                except Exception:
+                    pass
+            return TaskResult(
+                status="failed", exit_code=-1, stdout="", stderr="",
+                error=f"Stream processing error: {exc}",
+            )
         finally:
             self.active_processes.pop(task_id, None)
@@ -1663,6 +1971,9 @@ class ProcessManager:
                 data = json.loads(stdout.strip())
                 if isinstance(data, dict):
                     parsed.append(data)
+                elif isinstance(data, list):
+                    # Handle JSON array (from --output-format json)
+                    parsed.extend(d for d in data if isinstance(d, dict))
             except (json.JSONDecodeError, ValueError):
                 pass
@@ -2139,6 +2450,8 @@ class HeartbeatService:
                         "available_agents": self._agents,
                         "system_metrics": self._collect_system_metrics(),
                         "os_info": get_os_info(),
+                        "daemon_version": DAEMON_VERSION,
+                        "client_type": _CLIENT_TYPE,
                     },
                     timeout=10,
                 )
@@ -2167,6 +2480,76 @@ class HeartbeatService:
             return {}
+# ── Log Uploader ──
+class LogUploader:
+    """Periodically uploads daemon log tail to the server for remote viewing."""
+    LOG_UPLOAD_INTERVAL = 300  # Upload every 5 minutes
+    LOG_TAIL_LINES = 500  # Last N lines to upload
+    def __init__(
+        self,
+        client: httpx.AsyncClient,
+        server_url: str,
+        runtime_id: str,
+    ):
+        self.client = client
+        self.server_url = server_url.rstrip("/")
+        self.runtime_id = runtime_id
+        self._task: asyncio.Task | None = None
+    async def start(self):
+        self._task = asyncio.create_task(self._loop())
+    async def stop(self):
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+    async def _loop(self):
+        # Initial upload after 30s delay (let daemon stabilize first)
+        await asyncio.sleep(30)
+        while True:
+            try:
+                await self._upload()
+            except asyncio.CancelledError:
+                raise
+            except Exception as e:
+                logger.warning("Log upload error: %s", e)
+            await asyncio.sleep(self.LOG_UPLOAD_INTERVAL)
+    async def _upload(self):
+        """Read daemon log tail and upload to server."""
+        try:
+            if not DAEMON_LOG_PATH.exists():
+                return
+            # Read last N lines efficiently
+            with open(DAEMON_LOG_PATH, "rb") as f:
+                # Seek from end to find last N lines
+                f.seek(0, 2)
+                file_size = f.tell()
+                # Read at most 100KB from end
+                read_size = min(file_size, 100 * 1024)
+                f.seek(file_size - read_size)
+                content = f.read().decode("utf-8", errors="replace")
+            # Take last N lines
+            lines = content.split("\n")
+            tail = "\n".join(lines[-self.LOG_TAIL_LINES:])
+            await self.client.post(
+                f"{self.server_url}/api/v1/runtimes/{self.runtime_id}/logs",
+                json={"log_tail": tail, "log_lines": self.LOG_TAIL_LINES},
+                timeout=15,
+            )
+        except Exception as e:
+            logger.warning("Failed to upload daemon log: %s", e)
 # ── Task Poller ──
@@ -2253,6 +2636,7 @@ class ServerConnection:
         self.heartbeat: HeartbeatService | None = None
         self.poller: TaskPoller | None = None
         self.reporter: ProgressReporter | None = None
+        self.log_uploader: LogUploader | None = None
         self._auth_failures = 0  # Consecutive auth failure count
         self._max_auth_failures = 3  # Trigger re-registration after this many
         # Short label for logging
@@ -2295,6 +2679,8 @@ class ServerConnection:
                 self.poller.runtime_id = self.runtime_id
             if self.reporter and self.runtime_id:
                 self.reporter.runtime_id = self.runtime_id
+            if self.log_uploader and self.runtime_id:
+                self.log_uploader.runtime_id = self.runtime_id
             self._auth_failures = 0
             logger.info("[%s] Re-registered successfully after token refresh", self.label)
         except Exception as e:
@@ -2321,6 +2707,8 @@ class ServerConnection:
                     "hardware_id": self.hardware_id,
                     "device_name": platform.node(),
                     "os_info": get_os_info(),
+                    "daemon_version": DAEMON_VERSION,
+                    "client_type": _CLIENT_TYPE,
                     "available_agents": agent_dicts,
                     "max_concurrent_tasks": max_concurrent,
                     "capabilities": {
@@ -2391,15 +2779,22 @@ class ServerConnection:
         self.reporter = ProgressReporter(
             self.client, self.server_url, self.runtime_id,
         )
+        self.log_uploader = LogUploader(
+            self.client, self.server_url, self.runtime_id,
+        )
     async def start_heartbeat(self):
         if self.heartbeat:
             await self.heartbeat.start()
+        if self.log_uploader:
+            await self.log_uploader.start()
     async def stop(self):
         """Stop heartbeat and unregister."""
         if self.heartbeat:
             await self.heartbeat.stop()
+        if self.log_uploader:
+            await self.log_uploader.stop()
         if self.runtime_id:
             try:
                 # Use deregister endpoint (no admin required) instead of DELETE
@@ -2499,40 +2894,118 @@ class RuntimeDaemon:
         CLI starts, etc.
         """
         lock_path = Path.home() / ".forgexa" / "daemon" / "daemon.lock"
+        pid_path = Path.home() / ".forgexa" / "daemon" / "daemon.pid"
         lock_path.parent.mkdir(parents=True, exist_ok=True)
         if sys.platform == "win32":
-            # Windows: use msvcrt file locking
+            # Windows: use msvcrt file locking.
+            #
+            # IMPORTANT: msvcrt.locking() creates mandatory byte-range locks
+            # that prevent OTHER processes from reading the locked bytes.
+            # Therefore we store the PID in a separate daemon.pid file that
+            # is never locked, so we can always read the old daemon's PID.
             import msvcrt
+            # ── Step 1: read old PID BEFORE touching the lock file ──
+            old_pid = None
+            try:
+                if pid_path.exists():
+                    old_pid = int(pid_path.read_text().strip())
+            except (ValueError, OSError):
+                pass
+            # ── Step 2: try to acquire the lock ──
             self._lock_file = open(lock_path, "w")
             try:
                 msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
             except (IOError, OSError):
-                # Lock held — try to kill old process via PID file
-                try:
-                    old_pid = int(lock_path.read_text().strip())
-                    logger.warning("Another daemon is running (PID %d). Terminating...", old_pid)
-                    import subprocess as _sp
-                    _sp.run(["taskkill", "/PID", str(old_pid), "/F"],
+                # Lock held by another daemon — kill it
+                import subprocess as _sp
+                if old_pid and old_pid != os.getpid():
+                    logger.warning("Another daemon (PID %d) holds the lock. Killing...", old_pid)
+                    _sp.run(["taskkill", "/PID", str(old_pid), "/F", "/T"],
                             capture_output=True)
-                    time.sleep(1)
-                except (ValueError, FileNotFoundError, PermissionError, OSError):
-                    pass
+                else:
+                    # No daemon.pid or PID matches us — find by process enumeration.
+                    # Uses PowerShell Get-CimInstance (reliable on all modern Windows).
+                    # wmic is deprecated since Windows 10 21H2 / Windows 11.
+                    logger.warning("No daemon PID file; killing daemon by process enumeration...")
+                    try:
+                        ps_script = (
+                            "Get-CimInstance Win32_Process | "
+                            "Where-Object { "
+                            "($_.CommandLine -like '*daemon.py*' -or $_.Name -eq 'forgexa-daemon.exe') "
+                            "-and $_.ProcessId -ne " + str(os.getpid()) + " } | "
+                            "Select-Object -ExpandProperty ProcessId"
+                        )
+                        result = _sp.run(
+                            ["powershell", "-NoProfile", "-NonInteractive", "-Command", ps_script],
+                            capture_output=True, text=True, timeout=15)
+                        for line in result.stdout.strip().splitlines():
+                            line = line.strip()
+                            if line.isdigit():
+                                pid = int(line)
+                                if pid != os.getpid():
+                                    logger.info("Killing orphan daemon process (PID %d)", pid)
+                                    _sp.run(["taskkill", "/PID", str(pid), "/F", "/T"],
+                                            capture_output=True)
+                    except Exception as e:
+                        logger.debug("Process enumeration fallback failed: %s", e)
+                # Wait for process to fully terminate and release file handles.
+                # Verify death before proceeding (Windows needs time to release handles).
+                time.sleep(2)
+                if old_pid and old_pid != os.getpid():
+                    for _ in range(6):  # Up to 3 more seconds
+                        try:
+                            result = _sp.run(
+                                ["tasklist", "/FI", f"PID eq {old_pid}", "/NH", "/FO", "CSV"],
+                                capture_output=True, text=True, timeout=5)
+                            if str(old_pid) not in result.stdout:
+                                break
+                        except Exception:
+                            break
+                        time.sleep(0.5)
-                # Retry
+                # Close our handle and remove stale lock file
                 self._lock_file.close()
-                self._lock_file = open(lock_path, "w")
                 try:
-                    msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
-                except (IOError, OSError):
+                    lock_path.unlink(missing_ok=True)
+                except OSError:
+                    pass
+                # Retry with backoff — up to 5 attempts (total ~15s)
+                acquired = False
+                for attempt in range(5):
+                    try:
+                        self._lock_file = open(lock_path, "w")
+                        msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
+                        acquired = True
+                        break
+                    except (IOError, OSError):
+                        self._lock_file.close()
+                        wait = (attempt + 1) * 1  # 1s, 2s, 3s, 4s, 5s
+                        logger.warning("Lock retry %d/5 failed, waiting %ds...", attempt + 1, wait)
+                        time.sleep(wait)
+                if not acquired:
                     logger.error("Cannot acquire daemon lock — another instance may still be running")
                     raise SystemExit(1)
+            # Write PID to lock file (for reference, though unreadable while locked)
             self._lock_file.seek(0)
             self._lock_file.truncate()
             self._lock_file.write(str(os.getpid()))
             self._lock_file.flush()
+            # Write PID to separate unlocked file — always readable by other
+            # processes (Rust manager, NSIS installer, next daemon instance).
+            try:
+                pid_path.write_text(str(os.getpid()))
+            except OSError as e:
+                logger.warning("Could not write daemon.pid: %s", e)
             logger.info("Acquired exclusive daemon lock (pid=%d)", os.getpid())
             return
@@ -2582,6 +3055,12 @@ class RuntimeDaemon:
         self._lock_file.flush()
         logger.info("Acquired exclusive daemon lock (pid=%d)", os.getpid())
+        # Write PID to separate file for consistency with Windows path
+        try:
+            pid_path.write_text(str(os.getpid()))
+        except OSError:
+            pass
         # Also clean up CLI daemon PID file if it points to a dead process
         cli_pid_file = Path.home() / ".forgexa-daemon.pid"
         if cli_pid_file.exists():
@@ -2623,20 +3102,37 @@ class RuntimeDaemon:
                        ", ".join(a.agent_id for a in self.agents))
         # 2. Register with all servers
-        for url in self.server_urls:
-            conn = ServerConnection(url, self.api_token, self.daemon_id, self.hardware_id)
-            try:
-                await conn.register(self.agents, self.max_concurrent)
-                conn.start_services(self.heartbeat_interval, self.poll_interval, self.agents)
-                await conn.start_heartbeat()
-                self.connections.append(conn)
-                logger.info("[%s] Connected and ready", conn.label)
-            except Exception as e:
-                logger.error("[%s] Failed to connect: %s — skipping this server", conn.label, e)
-                await conn.client.aclose()
+        # 2. Register with all servers (with retry on transient failures)
+        max_registration_attempts = 5
+        for attempt in range(max_registration_attempts):
+            for url in self.server_urls:
+                if any(c.server_url == url.rstrip("/") for c in self.connections):
+                    continue  # Already connected to this server
+                conn = ServerConnection(url, self.api_token, self.daemon_id, self.hardware_id)
+                try:
+                    await conn.register(self.agents, self.max_concurrent)
+                    conn.start_services(self.heartbeat_interval, self.poll_interval, self.agents)
+                    await conn.start_heartbeat()
+                    self.connections.append(conn)
+                    logger.info("[%s] Connected and ready", conn.label)
+                except Exception as e:
+                    logger.error("[%s] Failed to connect: %s — skipping this server", conn.label, e)
+                    await conn.client.aclose()
+            if self.connections:
+                break  # At least one server connected
+            if attempt < max_registration_attempts - 1:
+                wait = (attempt + 1) * 5  # 5s, 10s, 15s, 20s
+                logger.warning(
+                    "No servers reachable (attempt %d/%d). Retrying in %ds...",
+                    attempt + 1, max_registration_attempts, wait,
+                )
+                await asyncio.sleep(wait)
         if not self.connections:
-            logger.error("Failed to connect to any server. Exiting.")
+            logger.error("Failed to connect to any server after %d attempts. Exiting.",
+                         max_registration_attempts)
             raise SystemExit(1)
         logger.info("Daemon ready. Connected to %d server(s). Polling for tasks...",
@@ -2730,15 +3226,15 @@ class RuntimeDaemon:
                 _line_buffer.extend(lines)
             async def _progress_ticker():
-                """Flush buffered output lines + update progress % every 10 s."""
+                """Flush buffered output lines + update progress % every 5 s."""
                 import math as _math
                 tick = 0
                 while not progress_stop.is_set():
-                    await asyncio.sleep(10)
+                    await asyncio.sleep(5)
                     if progress_stop.is_set():
                         break
                     tick += 1
-                    pct = min(int(10 + 80 * (1 - 1 / (1 + tick / 8))), 90)
+                    pct = min(int(10 + 80 * (1 - 1 / (1 + tick / 16))), 90)
                     pid = self.process_manager.active_processes.get(task.task_id)
                     step = "running_agent"
                     if pid:
@@ -2775,7 +3271,26 @@ class RuntimeDaemon:
             tried_agents.add(agent.agent_id)
             # ── Agent fallback: if agent hit rate limit or API is unavailable, try next agent ──
+            # Guard: if the agent already produced file changes in the workspace, it DID
+            # meaningful work — don't trigger fallback even if it crashed after completing.
+            # Let the recovery logic (step 4.1) handle non-zero exit with committed work.
+            _skip_fallback = False
             if self.process_manager.is_rate_limited(result):
+                _pre_fallback_git = await self.process_manager._collect_git_info(workspace_path)
+                _pre_fallback_committed = await self.process_manager._collect_git_info_vs_parent(workspace_path)
+                has_workspace_changes = (
+                    bool(_pre_fallback_git.get("files_changed"))
+                    or bool(_pre_fallback_committed.get("files_changed"))
+                )
+                if has_workspace_changes:
+                    logger.info(
+                        "Agent '%s' exited non-zero for task %s but workspace has changes — "
+                        "skipping fallback, proceeding to recovery",
+                        agent.agent_id, task.task_id,
+                    )
+                    _skip_fallback = True
+            if self.process_manager.is_rate_limited(result) and not _skip_fallback:
                 logger.warning(
                     "Agent '%s' unavailable/rate-limited for task %s, attempting fallback",
                     agent.agent_id, task.task_id,
@@ -2805,11 +3320,11 @@ class RuntimeDaemon:
                     async def _progress_ticker2():
                         tick = 0
                         while not progress_stop2.is_set():
-                            await asyncio.sleep(10)
+                            await asyncio.sleep(5)
                             if progress_stop2.is_set():
                                 break
                             tick += 1
-                            pct = min(int(10 + 80 * (1 - 1 / (1 + tick / 8))), 90)
+                            pct = min(int(10 + 80 * (1 - 1 / (1 + tick / 16))), 90)
                             pid = self.process_manager.active_processes.get(task.task_id)
                             step = f"running_agent:{agent.agent_id}"
                             if pid:
@@ -2922,13 +3437,33 @@ class RuntimeDaemon:
             # Existing files from a prior iteration are not sufficient evidence.
             if result.status == "success" and task.node_type in ("analysis", "design"):
                 committed_git = await self.process_manager._collect_git_info_vs_parent(workspace_path)
-                if not self._has_required_deliverable_updates(
+                git_check_passed = self.process_manager._has_required_deliverable_updates(
                     task,
                     pre_commit_git.get("files_changed"),
                     committed_git.get("files_changed"),
                     result.files_changed,
                     (result.git or {}).get("files_changed"),
-                ):
+                )
+                # Fallback: if git-based check fails (e.g., agent auto-committed and
+                # merge-base detection failed), verify files physically exist on disk.
+                # This prevents false failures when git state is unusual but files
+                # are actually present.
+                if not git_check_passed:
+                    required_paths = self.process_manager._required_deliverable_paths(task)
+                    if required_paths:
+                        files_exist = all(
+                            (workspace_path / p).exists() and (workspace_path / p).stat().st_size > 0
+                            for p in required_paths
+                        )
+                        if files_exist:
+                            logger.info(
+                                "Task %s (%s): git diff did not show deliverables but all %d "
+                                "files exist on disk — accepting as success",
+                                task.task_id, task.node_type, len(required_paths),
+                            )
+                            git_check_passed = True
+                if not git_check_passed:
                     logger.warning(
                         "Task %s (%s) reported success but did not update required deliverables",
                         task.task_id, task.node_type,
@@ -2955,6 +3490,16 @@ class RuntimeDaemon:
                 if commit_result:
                     # Propagate push/commit errors in metrics so they're visible
                     result.metrics.update(commit_result)
+                    # Push failure is a real problem for downstream nodes — mark
+                    # as failed so the orchestrator can retry (transient network).
+                    if commit_result.get("push_error"):
+                        push_err = commit_result["push_error"]
+                        logger.error(
+                            "Task %s: push failed, marking as failed so retry can attempt push again: %s",
+                            task.task_id, push_err,
+                        )
+                        result.status = "failed"
+                        result.error = f"Git push failed: {push_err}"
                 # Re-collect git info after commit (compare with parent)
                 post_commit_git = await self.process_manager._collect_git_info_vs_parent(workspace_path)
                 # Merge: use the pre-commit file list if post-commit is empty
@@ -3057,15 +3602,13 @@ class RuntimeDaemon:
         if node_type == "analysis":
             # Use type profile to determine required analysis outputs
-            try:
-                from app.services.type_workflow_profiles import get_profile
-                profile = get_profile(req_type)
-                required_files = profile.analysis_outputs
-            except Exception:
-                # Fallback to full set if profile import fails
-                required_files = ["PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json"]
+            required_files = _get_analysis_outputs_for_type(req_type)
-            doc_dir = (task.input_data or {}).get("output_dir", "")
+            # Analysis deliverables live in analysis_output_dir (docs/requirements/...)
+            doc_dir = (
+                (task.input_data or {}).get("analysis_output_dir", "")
+                or (task.input_data or {}).get("output_dir", "")
+            )
             if doc_dir:
                 base = workspace_path / doc_dir
             else:
@@ -3300,20 +3843,25 @@ class RuntimeDaemon:
         always receives the file contents via the completion report and gate
         reviewers can see the analysis documents immediately.
         """
-        doc_dir = (task.input_data or {}).get("output_dir", "")
+        # Analysis deliverables live in analysis_output_dir (docs/requirements/...)
+        doc_dir = (
+            (task.input_data or {}).get("analysis_output_dir", "")
+            or (task.input_data or {}).get("output_dir", "")
+        )
         if not doc_dir:
             return
         base = workspace_path / doc_dir.lstrip("./")
-        _ANALYSIS_FILES = ("PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json")
-        existing_artifact_paths = {a.get("path", "") for a in result.artifacts}
+        req_type = (task.input_data or {}).get("requirement_type", "feature")
+        _ANALYSIS_FILES = _get_analysis_outputs_for_type(req_type)
+        existing_artifact_paths = {a.get("path", "").replace("\\", "/") for a in result.artifacts}
         for fname in _ANALYSIS_FILES:
             fpath = base / fname
             if not fpath.exists() or fpath.stat().st_size == 0:
                 continue
             try:
-                rel_path = str(fpath.relative_to(workspace_path))
+                rel_path = str(fpath.relative_to(workspace_path)).replace("\\", "/")
                 if rel_path in existing_artifact_paths:
                     continue  # already attached
                 content = fpath.read_text(encoding="utf-8", errors="replace")
@@ -3343,13 +3891,13 @@ class RuntimeDaemon:
             return
         base = workspace_path / doc_dir.lstrip("./")
-        existing_artifact_paths = {a.get("path", "") for a in result.artifacts}
+        existing_artifact_paths = {a.get("path", "").replace("\\", "/") for a in result.artifacts}
         design_path = base / "design.md"
         if not design_path.exists() or design_path.stat().st_size == 0:
             return
         try:
-            rel_path = str(design_path.relative_to(workspace_path))
+            rel_path = str(design_path.relative_to(workspace_path)).replace("\\", "/")
             if rel_path in existing_artifact_paths:
                 return
             content = design_path.read_text(encoding="utf-8", errors="replace")
@@ -3411,6 +3959,8 @@ class RuntimeDaemon:
                     or task.input_data.get("title")
                     or ""
                 )
+                if not isinstance(wi_title, str):
+                    wi_title = str(wi_title)
                 req_key = task.requirement_key or task.work_item.get("requirement_key") or ""
                 if req_key and wi_title:
                     display_title = f"{req_key}: {wi_title}"
@@ -3421,11 +3971,15 @@ class RuntimeDaemon:
                 else:
                     display_title = task.task_id
-                commit_msg = await self._build_auto_commit_message(
-                    display_title, task.task_id, task.node_type,
-                    task.agent_type, change_summary,
-                    workspace_path=workspace_path,
-                )
+                try:
+                    commit_msg = await self._build_auto_commit_message(
+                        display_title, task.task_id, task.node_type,
+                        task.agent_type, change_summary,
+                        workspace_path=workspace_path,
+                    )
+                except Exception as msg_err:
+                    logger.warning("Failed to build rich commit message: %s — using fallback", msg_err)
+                    commit_msg = f"{task.node_type}({task.requirement_key or task.task_id}): {display_title}"
                 proc = await asyncio.create_subprocess_exec(
                     "git", "commit", "-m", commit_msg,
                     cwd=str(workspace_path),
@@ -3622,7 +4176,22 @@ class RuntimeDaemon:
         lines: list[str] = []
         # Summary — word-wrap at 78 chars
-        summary = (data.get("summary") or "").strip()
+        raw_summary = data.get("summary")
+        if isinstance(raw_summary, dict):
+            # Some agents produce summary as a structured object; extract description
+            summary = (
+                raw_summary.get("description")
+                or raw_summary.get("title")
+                or raw_summary.get("summary")
+                or ""
+            )
+            if not isinstance(summary, str):
+                summary = str(summary) if summary else ""
+        elif isinstance(raw_summary, str):
+            summary = raw_summary
+        else:
+            summary = str(raw_summary) if raw_summary else ""
+        summary = summary.strip()
         if summary:
             words = summary.split()
             current = ""

{forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: forgexa-cli
-Version: 1.3.2
+Version: 1.4.2
 Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
 Author-email: Jason Sun <dev.winds@gmail.com>
 License: MIT

{forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "forgexa-cli"
-version = "1.3.2"
+version = "1.4.2"
 description = "Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform"
 requires-python = ">=3.9"
 license = { text = "MIT" }