forgexa-cli 1.3.2__tar.gz → 1.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/PKG-INFO +1 -1
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/__init__.py +1 -1
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/daemon.py +660 -91
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/PKG-INFO +1 -1
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/pyproject.toml +1 -1
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/README.md +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/_build_config.py +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/main.py +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli/py.typed +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/SOURCES.txt +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/dependency_links.txt +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/entry_points.txt +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/requires.txt +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/forgexa_cli.egg-info/top_level.txt +0 -0
- {forgexa_cli-1.3.2 → forgexa_cli-1.4.2}/setup.cfg +0 -0
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""forgexa-cli — Forgexa command-line client."""
|
|
2
|
-
__version__ = "1.
|
|
2
|
+
__version__ = "1.4.2"
|
|
@@ -37,16 +37,198 @@ try:
|
|
|
37
37
|
except ImportError:
|
|
38
38
|
fcntl = None # type: ignore[assignment]
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
40
|
+
# ── httpx dependency — robust auto-install for standalone environments ──
|
|
41
|
+
# When running inside the backend package, httpx is a declared dependency and
|
|
42
|
+
# always available. In standalone contexts (desktop AppImage/DMG/MSI, CLI
|
|
43
|
+
# without [daemon] extra), httpx may be missing. We try multiple strategies:
|
|
44
|
+
#
|
|
45
|
+
# 1. Direct import (works for backend & CLI[daemon])
|
|
46
|
+
# 2. Import from cached deps dir (~/.forgexa/daemon/deps)
|
|
47
|
+
# 3. Auto-install via pip --target to the cached deps dir
|
|
48
|
+
# (bypasses PEP 668 / externally-managed-environment on modern distros)
|
|
49
|
+
# 4. Friendly error with OS-specific instructions if all else fails
|
|
50
|
+
_HTTPX_DEPS_DIR = os.path.join(str(Path.home()), ".forgexa", "daemon", "deps")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _try_install_httpx(deps_dir: str) -> tuple[bool, str]:
|
|
54
|
+
"""Try to install httpx to a user-writable directory.
|
|
55
|
+
|
|
56
|
+
Uses pip --target which works on:
|
|
57
|
+
- AppImage (read-only squashfs, system Python)
|
|
58
|
+
- PEP 668 systems (Ubuntu 23.04+, Fedora 38+) — bypasses externally-managed check
|
|
59
|
+
- macOS .app bundles (sandboxed Python)
|
|
60
|
+
- Windows portable installs
|
|
61
|
+
- Docker containers with read-only system dirs
|
|
62
|
+
|
|
63
|
+
Returns (success, error_detail).
|
|
64
|
+
"""
|
|
65
|
+
os.makedirs(deps_dir, exist_ok=True)
|
|
66
|
+
python = sys.executable or "python3"
|
|
67
|
+
|
|
68
|
+
# Try pip --target first (most universally compatible).
|
|
69
|
+
# Falls back to --user, then --break-system-packages as last resort.
|
|
70
|
+
# We explicitly list httpcore alongside httpx because pip --target may
|
|
71
|
+
# skip transitive deps it finds in system site-packages, even though
|
|
72
|
+
# they won't be importable from the isolated deps directory.
|
|
73
|
+
strategies: list[tuple[str, list[str]]] = [
|
|
74
|
+
(
|
|
75
|
+
"pip install --target (isolated deps)",
|
|
76
|
+
[python, "-m", "pip", "install", "--target", deps_dir,
|
|
77
|
+
"--quiet", "--upgrade", "httpx>=0.24", "httpcore"],
|
|
78
|
+
),
|
|
79
|
+
(
|
|
80
|
+
"pip install --user",
|
|
81
|
+
[python, "-m", "pip", "install", "--user", "--quiet",
|
|
82
|
+
"httpx>=0.24", "httpcore"],
|
|
83
|
+
),
|
|
84
|
+
(
|
|
85
|
+
"pip install --break-system-packages",
|
|
86
|
+
[python, "-m", "pip", "install", "--quiet",
|
|
87
|
+
"--break-system-packages", "httpx>=0.24", "httpcore"],
|
|
88
|
+
),
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
last_error = ""
|
|
92
|
+
for label, cmd in strategies:
|
|
93
|
+
try:
|
|
94
|
+
result = subprocess.run(
|
|
95
|
+
cmd,
|
|
96
|
+
stdout=subprocess.DEVNULL,
|
|
97
|
+
stderr=subprocess.PIPE,
|
|
98
|
+
text=True,
|
|
99
|
+
timeout=120,
|
|
100
|
+
)
|
|
101
|
+
if result.returncode == 0:
|
|
102
|
+
return True, ""
|
|
103
|
+
last_error = f"[{label}] exit code {result.returncode}"
|
|
104
|
+
stderr_text = (result.stderr or "").strip()
|
|
105
|
+
if stderr_text:
|
|
106
|
+
# Keep last 5 lines of stderr for diagnostics
|
|
107
|
+
stderr_lines = stderr_text.splitlines()[-5:]
|
|
108
|
+
last_error += ": " + " | ".join(stderr_lines)
|
|
109
|
+
except FileNotFoundError:
|
|
110
|
+
last_error = f"[{label}] Python not found: {cmd[0]}"
|
|
111
|
+
except subprocess.TimeoutExpired:
|
|
112
|
+
last_error = f"[{label}] timed out after 120s"
|
|
113
|
+
except Exception as exc:
|
|
114
|
+
last_error = f"[{label}] {type(exc).__name__}: {exc}"
|
|
115
|
+
|
|
116
|
+
return False, last_error
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _die_missing_httpx(detail: str) -> None:
|
|
120
|
+
"""Print a clear, actionable error and exit when httpx cannot be loaded."""
|
|
121
|
+
os_name = platform.system()
|
|
122
|
+
python_path = sys.executable or "(unknown)"
|
|
123
|
+
|
|
124
|
+
if os_name == "Linux":
|
|
125
|
+
hints = [
|
|
126
|
+
"pip3 install --user httpx",
|
|
127
|
+
"sudo apt install python3-httpx # Debian/Ubuntu",
|
|
128
|
+
"sudo dnf install python3-httpx # Fedora/RHEL",
|
|
129
|
+
"pip3 install forgexa-cli[daemon]",
|
|
130
|
+
]
|
|
131
|
+
elif os_name == "Darwin":
|
|
132
|
+
hints = [
|
|
133
|
+
"pip3 install httpx",
|
|
134
|
+
"brew install python3 && pip3 install httpx",
|
|
135
|
+
"pip3 install forgexa-cli[daemon]",
|
|
136
|
+
]
|
|
137
|
+
elif os_name == "Windows":
|
|
138
|
+
hints = [
|
|
139
|
+
"pip install httpx",
|
|
140
|
+
"pip install forgexa-cli[daemon]",
|
|
141
|
+
]
|
|
142
|
+
else:
|
|
143
|
+
hints = [
|
|
144
|
+
"pip3 install httpx",
|
|
145
|
+
"pip3 install forgexa-cli[daemon]",
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
hint_lines = "\n".join(f" {h}" for h in hints)
|
|
149
|
+
msg = (
|
|
150
|
+
"\n"
|
|
151
|
+
"┌─────────────────────────────────────────────────────────────────────┐\n"
|
|
152
|
+
"│ Forgexa Daemon: missing required dependency 'httpx' │\n"
|
|
153
|
+
"└─────────────────────────────────────────────────────────────────────┘\n"
|
|
154
|
+
"\n"
|
|
155
|
+
" The daemon requires the 'httpx' HTTP client library but it could\n"
|
|
156
|
+
" not be imported, and automatic installation failed.\n"
|
|
157
|
+
"\n"
|
|
158
|
+
f" Python: {python_path}\n"
|
|
159
|
+
f" Platform: {os_name} ({platform.machine()})\n"
|
|
160
|
+
f" Detail: {detail}\n"
|
|
161
|
+
"\n"
|
|
162
|
+
" Please install it manually with one of these commands:\n"
|
|
163
|
+
"\n"
|
|
164
|
+
f"{hint_lines}\n"
|
|
165
|
+
"\n"
|
|
166
|
+
" Then restart the daemon.\n"
|
|
167
|
+
"─────────────────────────────────────────────────────────────────────\n"
|
|
48
168
|
)
|
|
49
|
-
|
|
169
|
+
print(msg, file=sys.stderr)
|
|
170
|
+
# Machine-readable summary for the desktop app to parse and show as a toast.
|
|
171
|
+
print(f"DAEMON_ERROR: Missing required Python package 'httpx'. {detail}", file=sys.stderr)
|
|
172
|
+
sys.exit(1)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _validate_httpx_imports() -> tuple[bool, str]:
|
|
176
|
+
"""Validate that httpx and its critical transitive deps are importable.
|
|
177
|
+
|
|
178
|
+
A bare ``import httpx`` can succeed even when httpcore is missing,
|
|
179
|
+
because httpx lazily imports its transport layer. We eagerly check
|
|
180
|
+
the full chain so the daemon fails fast with a clear message instead
|
|
181
|
+
of crashing mid-operation when ``httpx.AsyncClient()`` tries to load
|
|
182
|
+
the transport.
|
|
183
|
+
|
|
184
|
+
Returns (ok, missing_module_name).
|
|
185
|
+
"""
|
|
186
|
+
for mod_name in ("httpx", "httpcore"):
|
|
187
|
+
try:
|
|
188
|
+
__import__(mod_name)
|
|
189
|
+
except ImportError:
|
|
190
|
+
return False, mod_name
|
|
191
|
+
return True, ""
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# Actual import sequence
|
|
195
|
+
_httpx_ok, _httpx_missing = _validate_httpx_imports()
|
|
196
|
+
|
|
197
|
+
if not _httpx_ok:
|
|
198
|
+
# Check cached deps directory (previous auto-install)
|
|
199
|
+
if _HTTPX_DEPS_DIR not in sys.path:
|
|
200
|
+
sys.path.insert(0, _HTTPX_DEPS_DIR)
|
|
201
|
+
_httpx_ok, _httpx_missing = _validate_httpx_imports()
|
|
202
|
+
|
|
203
|
+
if not _httpx_ok:
|
|
204
|
+
# If httpx is present but a sub-dependency (httpcore) is missing,
|
|
205
|
+
# the deps directory has a partial/stale installation. Clear it and
|
|
206
|
+
# purge cached modules so pip does a clean install with all transitive
|
|
207
|
+
# dependencies.
|
|
208
|
+
if _httpx_missing != "httpx":
|
|
209
|
+
shutil.rmtree(_HTTPX_DEPS_DIR, ignore_errors=True)
|
|
210
|
+
for _mod_key in list(sys.modules):
|
|
211
|
+
if _mod_key in ("httpx", "httpcore") or \
|
|
212
|
+
_mod_key.startswith(("httpx.", "httpcore.")):
|
|
213
|
+
del sys.modules[_mod_key]
|
|
214
|
+
|
|
215
|
+
# Attempt auto-install to user-writable deps directory
|
|
216
|
+
_ok, _err = _try_install_httpx(_HTTPX_DEPS_DIR)
|
|
217
|
+
if _ok:
|
|
218
|
+
if _HTTPX_DEPS_DIR not in sys.path:
|
|
219
|
+
sys.path.insert(0, _HTTPX_DEPS_DIR)
|
|
220
|
+
_httpx_ok, _httpx_missing = _validate_httpx_imports()
|
|
221
|
+
if not _httpx_ok:
|
|
222
|
+
_die_missing_httpx(
|
|
223
|
+
f"pip install succeeded but '{_httpx_missing}' still cannot "
|
|
224
|
+
"be imported — check Python version compatibility"
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
_die_missing_httpx(_err)
|
|
228
|
+
|
|
229
|
+
import httpx # noqa: E402 — guaranteed available after validation above
|
|
230
|
+
|
|
231
|
+
del _httpx_ok, _httpx_missing
|
|
50
232
|
|
|
51
233
|
# ── Settings: graceful fallback when running standalone (outside backend package) ──
|
|
52
234
|
try:
|
|
@@ -117,6 +299,36 @@ except (ImportError, ModuleNotFoundError):
|
|
|
117
299
|
|
|
118
300
|
settings = _StandaloneSettings() # type: ignore[assignment]
|
|
119
301
|
|
|
302
|
+
# ── Daemon version and client type ────────────────────────────────────────
|
|
303
|
+
# DAEMON_VERSION is the protocol/logic version of the daemon code.
|
|
304
|
+
# Kept in sync with pyproject.toml version via bump-version.sh.
|
|
305
|
+
# CLIENT_TYPE identifies which packaging/distribution this daemon runs in.
|
|
306
|
+
DAEMON_VERSION = "1.4.2"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _detect_client_type() -> str:
|
|
310
|
+
"""Auto-detect client type from runtime context.
|
|
311
|
+
|
|
312
|
+
Priority:
|
|
313
|
+
1. FORGEXA_CLIENT_TYPE env var (set by desktop Tauri launcher)
|
|
314
|
+
2. Import context: app.config importable → "server"
|
|
315
|
+
3. Default: "cli" (standalone pip-installed daemon)
|
|
316
|
+
|
|
317
|
+
This allows a single daemon.py source to work correctly regardless
|
|
318
|
+
of deployment context, making the bundle-daemon.sh copy safe.
|
|
319
|
+
"""
|
|
320
|
+
env_type = os.environ.get("FORGEXA_CLIENT_TYPE", "").strip().lower()
|
|
321
|
+
if env_type in ("server", "cli", "desktop"):
|
|
322
|
+
return env_type
|
|
323
|
+
# Server: app.config was successfully imported at module level above
|
|
324
|
+
if "app.config" in sys.modules:
|
|
325
|
+
return "server"
|
|
326
|
+
# Default: standalone execution = CLI
|
|
327
|
+
return "cli"
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
_CLIENT_TYPE = _detect_client_type()
|
|
331
|
+
|
|
120
332
|
# ── Logging — self-managed file handler ────────────────────────────────
|
|
121
333
|
# The daemon configures its own FileHandler so logs are written to
|
|
122
334
|
# ~/.forgexa/daemon/daemon.log regardless of how the daemon was launched
|
|
@@ -331,6 +543,31 @@ class TaskResult:
|
|
|
331
543
|
git: dict = field(default_factory=dict)
|
|
332
544
|
|
|
333
545
|
|
|
546
|
+
# ── Type-aware analysis outputs (inline fallback for standalone daemons) ──
|
|
547
|
+
# Mirrors type_workflow_profiles.py — used when import is unavailable (CLI/Desktop).
|
|
548
|
+
_ANALYSIS_OUTPUTS_BY_TYPE: dict[str, list[str]] = {
|
|
549
|
+
"feature": ["PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json"],
|
|
550
|
+
"bugfix": ["diagnosis.md", "TASKS.md", "analysis.json", "test-intent.json"],
|
|
551
|
+
"refactor": ["refactor-plan.md", "TASKS.md", "analysis.json"],
|
|
552
|
+
"documentation": ["outline.md", "analysis.json"],
|
|
553
|
+
"improvement": ["improvement-spec.md", "TASKS.md", "analysis.json", "test-intent.json"],
|
|
554
|
+
"task": ["task-plan.md", "analysis.json"],
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def _get_analysis_outputs_for_type(req_type: str) -> list[str]:
|
|
559
|
+
"""Get expected analysis output files for a requirement type.
|
|
560
|
+
|
|
561
|
+
Tries to use type_workflow_profiles (available in backend context),
|
|
562
|
+
falls back to inline mapping for standalone daemon execution.
|
|
563
|
+
"""
|
|
564
|
+
try:
|
|
565
|
+
from app.services.type_workflow_profiles import get_profile
|
|
566
|
+
return list(get_profile(req_type).analysis_outputs)
|
|
567
|
+
except Exception:
|
|
568
|
+
return _ANALYSIS_OUTPUTS_BY_TYPE.get(req_type, _ANALYSIS_OUTPUTS_BY_TYPE["feature"])
|
|
569
|
+
|
|
570
|
+
|
|
334
571
|
# ── Agent Discovery ──
|
|
335
572
|
|
|
336
573
|
|
|
@@ -993,9 +1230,19 @@ class WorkspaceManager:
|
|
|
993
1230
|
os.write(fd, b"\n")
|
|
994
1231
|
os.close(fd)
|
|
995
1232
|
os.chmod(key_path, stat_mod.S_IRUSR)
|
|
1233
|
+
# On Windows, convert backslashes to forward slashes and quote
|
|
1234
|
+
# the path. Git invokes GIT_SSH_COMMAND via MSYS2 shell which
|
|
1235
|
+
# interprets backslashes as escape sequences, corrupting the
|
|
1236
|
+
# path (e.g. C:\Users → C:Users).
|
|
1237
|
+
key_path_safe = key_path.replace("\\", "/") if sys.platform == "win32" else key_path
|
|
996
1238
|
env = {
|
|
997
1239
|
**os.environ,
|
|
998
|
-
"GIT_SSH_COMMAND":
|
|
1240
|
+
"GIT_SSH_COMMAND": (
|
|
1241
|
+
f'ssh -i "{key_path_safe}"'
|
|
1242
|
+
f" -o StrictHostKeyChecking=accept-new"
|
|
1243
|
+
f" -o UserKnownHostsFile=/dev/null"
|
|
1244
|
+
f" -o IdentitiesOnly=yes"
|
|
1245
|
+
),
|
|
999
1246
|
}
|
|
1000
1247
|
except Exception:
|
|
1001
1248
|
try:
|
|
@@ -1031,10 +1278,12 @@ class WorkspaceManager:
|
|
|
1031
1278
|
# Clean up temp SSH key file if created
|
|
1032
1279
|
if env and "GIT_SSH_COMMAND" in env:
|
|
1033
1280
|
import re as _re
|
|
1034
|
-
m = _re.search(r
|
|
1281
|
+
m = _re.search(r'-i\s+"?([^"\s]+)"?', env["GIT_SSH_COMMAND"])
|
|
1035
1282
|
if m:
|
|
1283
|
+
key_file = m.group(1)
|
|
1284
|
+
# Resolve forward-slash path back to native for unlink
|
|
1036
1285
|
try:
|
|
1037
|
-
os.unlink(
|
|
1286
|
+
os.unlink(key_file)
|
|
1038
1287
|
except OSError:
|
|
1039
1288
|
pass
|
|
1040
1289
|
if proc.returncode != 0:
|
|
@@ -1054,15 +1303,11 @@ class ProcessManager:
|
|
|
1054
1303
|
"usage limit",
|
|
1055
1304
|
"rate limit",
|
|
1056
1305
|
"rate_limit",
|
|
1057
|
-
"429",
|
|
1058
1306
|
"quota exceeded",
|
|
1059
1307
|
"too many requests",
|
|
1060
1308
|
"overloaded",
|
|
1061
|
-
"capacity",
|
|
1062
|
-
"try again",
|
|
1063
|
-
"credit",
|
|
1064
1309
|
"insufficient_quota",
|
|
1065
|
-
"billing",
|
|
1310
|
+
"billing hard limit",
|
|
1066
1311
|
]
|
|
1067
1312
|
|
|
1068
1313
|
# Patterns indicating the agent's API is unreachable/misconfigured —
|
|
@@ -1074,9 +1319,11 @@ class ProcessManager:
|
|
|
1074
1319
|
"connection refused",
|
|
1075
1320
|
"connection reset",
|
|
1076
1321
|
"connection timed out",
|
|
1322
|
+
"connection error",
|
|
1077
1323
|
"name or service not known",
|
|
1078
1324
|
"no such host",
|
|
1079
1325
|
"network is unreachable",
|
|
1326
|
+
"api error",
|
|
1080
1327
|
]
|
|
1081
1328
|
|
|
1082
1329
|
def __init__(self):
|
|
@@ -1127,8 +1374,12 @@ class ProcessManager:
|
|
|
1127
1374
|
elif isinstance(err, str):
|
|
1128
1375
|
error_messages.append(err)
|
|
1129
1376
|
elif ev_type == "result":
|
|
1130
|
-
|
|
1131
|
-
|
|
1377
|
+
if data.get("is_error"):
|
|
1378
|
+
err_text = str(data.get("result", "") or data.get("error", "") or "result marked as error")
|
|
1379
|
+
error_messages.append(err_text)
|
|
1380
|
+
else:
|
|
1381
|
+
has_result = True
|
|
1382
|
+
has_meaningful_content = True
|
|
1132
1383
|
elif ev_type == "error":
|
|
1133
1384
|
msg = data.get("message", "")
|
|
1134
1385
|
if msg:
|
|
@@ -1170,13 +1421,25 @@ class ProcessManager:
|
|
|
1170
1421
|
|
|
1171
1422
|
Returns True for rate/quota limits AND API unavailability errors,
|
|
1172
1423
|
since a different agent (using a different API backend) may succeed.
|
|
1424
|
+
|
|
1425
|
+
IMPORTANT: Only checks stderr, error message, and the tail of stdout.
|
|
1426
|
+
The full stdout contains the agent's work output (e.g., analysis text
|
|
1427
|
+
about APIs, retry logic, HTTP status codes) which naturally contains
|
|
1428
|
+
patterns like "429", "try again", "capacity" — these are NOT indicators
|
|
1429
|
+
of the agent CLI itself being rate-limited.
|
|
1173
1430
|
"""
|
|
1174
1431
|
if result.status == "success":
|
|
1175
1432
|
return False
|
|
1176
|
-
|
|
1433
|
+
# Search error channels: stderr (CLI errors) + error message + tail of stdout
|
|
1434
|
+
# (last 3000 chars catches any CLI-level error at the end of output)
|
|
1435
|
+
error_text = (
|
|
1436
|
+
(result.stderr or "")
|
|
1437
|
+
+ "\n" + (result.error or "")
|
|
1438
|
+
+ "\n" + (result.stdout or "")[-3000:]
|
|
1439
|
+
).lower()
|
|
1177
1440
|
return (
|
|
1178
|
-
any(p in
|
|
1179
|
-
or any(p in
|
|
1441
|
+
any(p in error_text for p in ProcessManager.RATE_LIMIT_PATTERNS)
|
|
1442
|
+
or any(p in error_text for p in ProcessManager.AGENT_UNAVAILABLE_PATTERNS)
|
|
1180
1443
|
)
|
|
1181
1444
|
|
|
1182
1445
|
@staticmethod
|
|
@@ -1193,8 +1456,16 @@ class ProcessManager:
|
|
|
1193
1456
|
if result.status != "success":
|
|
1194
1457
|
return None
|
|
1195
1458
|
|
|
1196
|
-
|
|
1197
|
-
|
|
1459
|
+
# For rate/unavailability pattern detection, only check error channels
|
|
1460
|
+
# (stderr, error field) plus the TAIL of stdout. The full stdout contains
|
|
1461
|
+
# the agent's work output (analysis text, generated docs) which naturally
|
|
1462
|
+
# mentions terms like "rate limit", "429", "capacity", "credit" etc.
|
|
1463
|
+
error_channels = (
|
|
1464
|
+
(result.stderr or "")
|
|
1465
|
+
+ "\n" + (result.error or "")
|
|
1466
|
+
+ "\n" + (result.stdout or "")[-3000:]
|
|
1467
|
+
)
|
|
1468
|
+
pattern_failure = ProcessManager._has_failure_pattern(error_channels)
|
|
1198
1469
|
if pattern_failure:
|
|
1199
1470
|
return pattern_failure
|
|
1200
1471
|
|
|
@@ -1308,18 +1579,23 @@ class ProcessManager:
|
|
|
1308
1579
|
return normalized
|
|
1309
1580
|
|
|
1310
1581
|
def _required_deliverable_paths(self, task: TaskInfo) -> set[str]:
|
|
1311
|
-
|
|
1582
|
+
# For analysis nodes, deliverables live in analysis_output_dir (docs/requirements/...)
|
|
1583
|
+
# For other nodes, use output_dir (docs/implements/...)
|
|
1584
|
+
if task.node_type == "analysis":
|
|
1585
|
+
output_dir = str(
|
|
1586
|
+
(task.input_data or {}).get("analysis_output_dir", "")
|
|
1587
|
+
or (task.input_data or {}).get("output_dir", "")
|
|
1588
|
+
or ""
|
|
1589
|
+
)
|
|
1590
|
+
else:
|
|
1591
|
+
output_dir = str((task.input_data or {}).get("output_dir", "") or "")
|
|
1312
1592
|
output_dir = output_dir.replace("\\", "/").lstrip("./").rstrip("/")
|
|
1313
1593
|
if not output_dir:
|
|
1314
1594
|
return set()
|
|
1315
1595
|
|
|
1316
1596
|
if task.node_type == "analysis":
|
|
1317
1597
|
req_type = (task.input_data or {}).get("requirement_type", "feature")
|
|
1318
|
-
|
|
1319
|
-
from app.services.type_workflow_profiles import get_profile
|
|
1320
|
-
required_files = list(get_profile(req_type).analysis_outputs)
|
|
1321
|
-
except Exception:
|
|
1322
|
-
required_files = ["PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json"]
|
|
1598
|
+
required_files = _get_analysis_outputs_for_type(req_type)
|
|
1323
1599
|
elif task.node_type == "design":
|
|
1324
1600
|
required_files = ["design.md"]
|
|
1325
1601
|
else:
|
|
@@ -1330,7 +1606,8 @@ class ProcessManager:
|
|
|
1330
1606
|
def _has_required_deliverable_updates(self, task: TaskInfo, *path_lists: list[str] | None) -> bool:
|
|
1331
1607
|
required_paths = self._required_deliverable_paths(task)
|
|
1332
1608
|
if not required_paths:
|
|
1333
|
-
|
|
1609
|
+
# Cannot determine required deliverables — skip check (don't fail)
|
|
1610
|
+
return True
|
|
1334
1611
|
|
|
1335
1612
|
changed_paths: set[str] = set()
|
|
1336
1613
|
for paths in path_lists:
|
|
@@ -1422,10 +1699,19 @@ class ProcessManager:
|
|
|
1422
1699
|
while True:
|
|
1423
1700
|
try:
|
|
1424
1701
|
line_bytes = await proc.stdout.readline()
|
|
1425
|
-
except ValueError:
|
|
1426
|
-
# Line exceeded stream buffer limit
|
|
1427
|
-
#
|
|
1428
|
-
|
|
1702
|
+
except (ValueError, asyncio.LimitOverrunError, Exception) as exc:
|
|
1703
|
+
# Line exceeded stream buffer limit (LimitOverrunError
|
|
1704
|
+
# converted to ValueError by readline(), but catch broadly
|
|
1705
|
+
# to handle edge cases in different Python versions).
|
|
1706
|
+
# Fall back to reading remaining data in bulk.
|
|
1707
|
+
logger.warning(
|
|
1708
|
+
"Stream read error for task %s (%s: %s), draining remaining output",
|
|
1709
|
+
task_id, type(exc).__name__, exc,
|
|
1710
|
+
)
|
|
1711
|
+
try:
|
|
1712
|
+
remaining = await proc.stdout.read()
|
|
1713
|
+
except Exception:
|
|
1714
|
+
remaining = b""
|
|
1429
1715
|
if remaining:
|
|
1430
1716
|
for chunk_line in remaining.decode(errors="replace").split("\n"):
|
|
1431
1717
|
if chunk_line:
|
|
@@ -1481,7 +1767,7 @@ class ProcessManager:
|
|
|
1481
1767
|
cmd = [
|
|
1482
1768
|
agent.command,
|
|
1483
1769
|
"-p",
|
|
1484
|
-
"--output-format", "json",
|
|
1770
|
+
"--output-format", "stream-json",
|
|
1485
1771
|
"--verbose",
|
|
1486
1772
|
"--dangerously-skip-permissions",
|
|
1487
1773
|
]
|
|
@@ -1499,7 +1785,7 @@ class ProcessManager:
|
|
|
1499
1785
|
stdin=asyncio.subprocess.PIPE,
|
|
1500
1786
|
cwd=str(cwd),
|
|
1501
1787
|
env=env,
|
|
1502
|
-
limit=
|
|
1788
|
+
limit=100 * 1024 * 1024, # 100MB line buffer for large JSON output from long sessions
|
|
1503
1789
|
)
|
|
1504
1790
|
self.active_processes[task_id] = proc
|
|
1505
1791
|
stdout, stderr, returncode = await self._stream_process(
|
|
@@ -1533,6 +1819,17 @@ class ProcessManager:
|
|
|
1533
1819
|
status="failed", exit_code=-1, stdout="", stderr="",
|
|
1534
1820
|
error=f"Timed out after {timeout}s",
|
|
1535
1821
|
)
|
|
1822
|
+
except Exception as exc:
|
|
1823
|
+
logger.exception("Claude stream error for task %s", task_id)
|
|
1824
|
+
if task_id in self.active_processes:
|
|
1825
|
+
try:
|
|
1826
|
+
self.active_processes[task_id].kill()
|
|
1827
|
+
except Exception:
|
|
1828
|
+
pass
|
|
1829
|
+
return TaskResult(
|
|
1830
|
+
status="failed", exit_code=-1, stdout="", stderr="",
|
|
1831
|
+
error=f"Stream processing error: {exc}",
|
|
1832
|
+
)
|
|
1536
1833
|
finally:
|
|
1537
1834
|
self.active_processes.pop(task_id, None)
|
|
1538
1835
|
|
|
@@ -1600,7 +1897,7 @@ class ProcessManager:
|
|
|
1600
1897
|
stderr=asyncio.subprocess.PIPE,
|
|
1601
1898
|
stdin=asyncio.subprocess.PIPE if stdin_input else None,
|
|
1602
1899
|
cwd=str(cwd),
|
|
1603
|
-
limit=
|
|
1900
|
+
limit=100 * 1024 * 1024, # 100MB line buffer for large agent output
|
|
1604
1901
|
)
|
|
1605
1902
|
self.active_processes[task_id] = proc
|
|
1606
1903
|
stdin_bytes = stdin_input.encode() if stdin_input else None
|
|
@@ -1622,6 +1919,17 @@ class ProcessManager:
|
|
|
1622
1919
|
status="failed", exit_code=-1, stdout="", stderr="",
|
|
1623
1920
|
error=f"Timed out after {timeout}s",
|
|
1624
1921
|
)
|
|
1922
|
+
except Exception as exc:
|
|
1923
|
+
logger.exception("CLI stream error for task %s", task_id)
|
|
1924
|
+
if task_id in self.active_processes:
|
|
1925
|
+
try:
|
|
1926
|
+
self.active_processes[task_id].kill()
|
|
1927
|
+
except Exception:
|
|
1928
|
+
pass
|
|
1929
|
+
return TaskResult(
|
|
1930
|
+
status="failed", exit_code=-1, stdout="", stderr="",
|
|
1931
|
+
error=f"Stream processing error: {exc}",
|
|
1932
|
+
)
|
|
1625
1933
|
finally:
|
|
1626
1934
|
self.active_processes.pop(task_id, None)
|
|
1627
1935
|
|
|
@@ -1663,6 +1971,9 @@ class ProcessManager:
|
|
|
1663
1971
|
data = json.loads(stdout.strip())
|
|
1664
1972
|
if isinstance(data, dict):
|
|
1665
1973
|
parsed.append(data)
|
|
1974
|
+
elif isinstance(data, list):
|
|
1975
|
+
# Handle JSON array (from --output-format json)
|
|
1976
|
+
parsed.extend(d for d in data if isinstance(d, dict))
|
|
1666
1977
|
except (json.JSONDecodeError, ValueError):
|
|
1667
1978
|
pass
|
|
1668
1979
|
|
|
@@ -2139,6 +2450,8 @@ class HeartbeatService:
|
|
|
2139
2450
|
"available_agents": self._agents,
|
|
2140
2451
|
"system_metrics": self._collect_system_metrics(),
|
|
2141
2452
|
"os_info": get_os_info(),
|
|
2453
|
+
"daemon_version": DAEMON_VERSION,
|
|
2454
|
+
"client_type": _CLIENT_TYPE,
|
|
2142
2455
|
},
|
|
2143
2456
|
timeout=10,
|
|
2144
2457
|
)
|
|
@@ -2167,6 +2480,76 @@ class HeartbeatService:
|
|
|
2167
2480
|
return {}
|
|
2168
2481
|
|
|
2169
2482
|
|
|
2483
|
+
# ── Log Uploader ──
|
|
2484
|
+
|
|
2485
|
+
|
|
2486
|
+
class LogUploader:
|
|
2487
|
+
"""Periodically uploads daemon log tail to the server for remote viewing."""
|
|
2488
|
+
|
|
2489
|
+
LOG_UPLOAD_INTERVAL = 300 # Upload every 5 minutes
|
|
2490
|
+
LOG_TAIL_LINES = 500 # Last N lines to upload
|
|
2491
|
+
|
|
2492
|
+
def __init__(
|
|
2493
|
+
self,
|
|
2494
|
+
client: httpx.AsyncClient,
|
|
2495
|
+
server_url: str,
|
|
2496
|
+
runtime_id: str,
|
|
2497
|
+
):
|
|
2498
|
+
self.client = client
|
|
2499
|
+
self.server_url = server_url.rstrip("/")
|
|
2500
|
+
self.runtime_id = runtime_id
|
|
2501
|
+
self._task: asyncio.Task | None = None
|
|
2502
|
+
|
|
2503
|
+
async def start(self):
|
|
2504
|
+
self._task = asyncio.create_task(self._loop())
|
|
2505
|
+
|
|
2506
|
+
async def stop(self):
|
|
2507
|
+
if self._task:
|
|
2508
|
+
self._task.cancel()
|
|
2509
|
+
try:
|
|
2510
|
+
await self._task
|
|
2511
|
+
except asyncio.CancelledError:
|
|
2512
|
+
pass
|
|
2513
|
+
|
|
2514
|
+
async def _loop(self):
|
|
2515
|
+
# Initial upload after 30s delay (let daemon stabilize first)
|
|
2516
|
+
await asyncio.sleep(30)
|
|
2517
|
+
while True:
|
|
2518
|
+
try:
|
|
2519
|
+
await self._upload()
|
|
2520
|
+
except asyncio.CancelledError:
|
|
2521
|
+
raise
|
|
2522
|
+
except Exception as e:
|
|
2523
|
+
logger.warning("Log upload error: %s", e)
|
|
2524
|
+
await asyncio.sleep(self.LOG_UPLOAD_INTERVAL)
|
|
2525
|
+
|
|
2526
|
+
async def _upload(self):
|
|
2527
|
+
"""Read daemon log tail and upload to server."""
|
|
2528
|
+
try:
|
|
2529
|
+
if not DAEMON_LOG_PATH.exists():
|
|
2530
|
+
return
|
|
2531
|
+
# Read last N lines efficiently
|
|
2532
|
+
with open(DAEMON_LOG_PATH, "rb") as f:
|
|
2533
|
+
# Seek from end to find last N lines
|
|
2534
|
+
f.seek(0, 2)
|
|
2535
|
+
file_size = f.tell()
|
|
2536
|
+
# Read at most 100KB from end
|
|
2537
|
+
read_size = min(file_size, 100 * 1024)
|
|
2538
|
+
f.seek(file_size - read_size)
|
|
2539
|
+
content = f.read().decode("utf-8", errors="replace")
|
|
2540
|
+
|
|
2541
|
+
# Take last N lines
|
|
2542
|
+
lines = content.split("\n")
|
|
2543
|
+
tail = "\n".join(lines[-self.LOG_TAIL_LINES:])
|
|
2544
|
+
|
|
2545
|
+
await self.client.post(
|
|
2546
|
+
f"{self.server_url}/api/v1/runtimes/{self.runtime_id}/logs",
|
|
2547
|
+
json={"log_tail": tail, "log_lines": self.LOG_TAIL_LINES},
|
|
2548
|
+
timeout=15,
|
|
2549
|
+
)
|
|
2550
|
+
except Exception as e:
|
|
2551
|
+
logger.warning("Failed to upload daemon log: %s", e)
|
|
2552
|
+
|
|
2170
2553
|
# ── Task Poller ──
|
|
2171
2554
|
|
|
2172
2555
|
|
|
@@ -2253,6 +2636,7 @@ class ServerConnection:
|
|
|
2253
2636
|
self.heartbeat: HeartbeatService | None = None
|
|
2254
2637
|
self.poller: TaskPoller | None = None
|
|
2255
2638
|
self.reporter: ProgressReporter | None = None
|
|
2639
|
+
self.log_uploader: LogUploader | None = None
|
|
2256
2640
|
self._auth_failures = 0 # Consecutive auth failure count
|
|
2257
2641
|
self._max_auth_failures = 3 # Trigger re-registration after this many
|
|
2258
2642
|
# Short label for logging
|
|
@@ -2295,6 +2679,8 @@ class ServerConnection:
|
|
|
2295
2679
|
self.poller.runtime_id = self.runtime_id
|
|
2296
2680
|
if self.reporter and self.runtime_id:
|
|
2297
2681
|
self.reporter.runtime_id = self.runtime_id
|
|
2682
|
+
if self.log_uploader and self.runtime_id:
|
|
2683
|
+
self.log_uploader.runtime_id = self.runtime_id
|
|
2298
2684
|
self._auth_failures = 0
|
|
2299
2685
|
logger.info("[%s] Re-registered successfully after token refresh", self.label)
|
|
2300
2686
|
except Exception as e:
|
|
@@ -2321,6 +2707,8 @@ class ServerConnection:
|
|
|
2321
2707
|
"hardware_id": self.hardware_id,
|
|
2322
2708
|
"device_name": platform.node(),
|
|
2323
2709
|
"os_info": get_os_info(),
|
|
2710
|
+
"daemon_version": DAEMON_VERSION,
|
|
2711
|
+
"client_type": _CLIENT_TYPE,
|
|
2324
2712
|
"available_agents": agent_dicts,
|
|
2325
2713
|
"max_concurrent_tasks": max_concurrent,
|
|
2326
2714
|
"capabilities": {
|
|
@@ -2391,15 +2779,22 @@ class ServerConnection:
|
|
|
2391
2779
|
self.reporter = ProgressReporter(
|
|
2392
2780
|
self.client, self.server_url, self.runtime_id,
|
|
2393
2781
|
)
|
|
2782
|
+
self.log_uploader = LogUploader(
|
|
2783
|
+
self.client, self.server_url, self.runtime_id,
|
|
2784
|
+
)
|
|
2394
2785
|
|
|
2395
2786
|
async def start_heartbeat(self):
|
|
2396
2787
|
if self.heartbeat:
|
|
2397
2788
|
await self.heartbeat.start()
|
|
2789
|
+
if self.log_uploader:
|
|
2790
|
+
await self.log_uploader.start()
|
|
2398
2791
|
|
|
2399
2792
|
async def stop(self):
|
|
2400
2793
|
"""Stop heartbeat and unregister."""
|
|
2401
2794
|
if self.heartbeat:
|
|
2402
2795
|
await self.heartbeat.stop()
|
|
2796
|
+
if self.log_uploader:
|
|
2797
|
+
await self.log_uploader.stop()
|
|
2403
2798
|
if self.runtime_id:
|
|
2404
2799
|
try:
|
|
2405
2800
|
# Use deregister endpoint (no admin required) instead of DELETE
|
|
@@ -2499,40 +2894,118 @@ class RuntimeDaemon:
|
|
|
2499
2894
|
CLI starts, etc.
|
|
2500
2895
|
"""
|
|
2501
2896
|
lock_path = Path.home() / ".forgexa" / "daemon" / "daemon.lock"
|
|
2897
|
+
pid_path = Path.home() / ".forgexa" / "daemon" / "daemon.pid"
|
|
2502
2898
|
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
2503
2899
|
|
|
2504
2900
|
if sys.platform == "win32":
|
|
2505
|
-
# Windows: use msvcrt file locking
|
|
2901
|
+
# Windows: use msvcrt file locking.
|
|
2902
|
+
#
|
|
2903
|
+
# IMPORTANT: msvcrt.locking() creates mandatory byte-range locks
|
|
2904
|
+
# that prevent OTHER processes from reading the locked bytes.
|
|
2905
|
+
# Therefore we store the PID in a separate daemon.pid file that
|
|
2906
|
+
# is never locked, so we can always read the old daemon's PID.
|
|
2506
2907
|
import msvcrt
|
|
2507
2908
|
|
|
2909
|
+
# ── Step 1: read old PID BEFORE touching the lock file ──
|
|
2910
|
+
old_pid = None
|
|
2911
|
+
try:
|
|
2912
|
+
if pid_path.exists():
|
|
2913
|
+
old_pid = int(pid_path.read_text().strip())
|
|
2914
|
+
except (ValueError, OSError):
|
|
2915
|
+
pass
|
|
2916
|
+
|
|
2917
|
+
# ── Step 2: try to acquire the lock ──
|
|
2508
2918
|
self._lock_file = open(lock_path, "w")
|
|
2509
2919
|
try:
|
|
2510
2920
|
msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
|
|
2511
2921
|
except (IOError, OSError):
|
|
2512
|
-
# Lock held
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
_sp.run(["taskkill", "/PID", str(old_pid), "/F"],
|
|
2922
|
+
# Lock held by another daemon — kill it
|
|
2923
|
+
import subprocess as _sp
|
|
2924
|
+
|
|
2925
|
+
if old_pid and old_pid != os.getpid():
|
|
2926
|
+
logger.warning("Another daemon (PID %d) holds the lock. Killing...", old_pid)
|
|
2927
|
+
_sp.run(["taskkill", "/PID", str(old_pid), "/F", "/T"],
|
|
2518
2928
|
capture_output=True)
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2929
|
+
else:
|
|
2930
|
+
# No daemon.pid or PID matches us — find by process enumeration.
|
|
2931
|
+
# Uses PowerShell Get-CimInstance (reliable on all modern Windows).
|
|
2932
|
+
# wmic is deprecated since Windows 10 21H2 / Windows 11.
|
|
2933
|
+
logger.warning("No daemon PID file; killing daemon by process enumeration...")
|
|
2934
|
+
try:
|
|
2935
|
+
ps_script = (
|
|
2936
|
+
"Get-CimInstance Win32_Process | "
|
|
2937
|
+
"Where-Object { "
|
|
2938
|
+
"($_.CommandLine -like '*daemon.py*' -or $_.Name -eq 'forgexa-daemon.exe') "
|
|
2939
|
+
"-and $_.ProcessId -ne " + str(os.getpid()) + " } | "
|
|
2940
|
+
"Select-Object -ExpandProperty ProcessId"
|
|
2941
|
+
)
|
|
2942
|
+
result = _sp.run(
|
|
2943
|
+
["powershell", "-NoProfile", "-NonInteractive", "-Command", ps_script],
|
|
2944
|
+
capture_output=True, text=True, timeout=15)
|
|
2945
|
+
for line in result.stdout.strip().splitlines():
|
|
2946
|
+
line = line.strip()
|
|
2947
|
+
if line.isdigit():
|
|
2948
|
+
pid = int(line)
|
|
2949
|
+
if pid != os.getpid():
|
|
2950
|
+
logger.info("Killing orphan daemon process (PID %d)", pid)
|
|
2951
|
+
_sp.run(["taskkill", "/PID", str(pid), "/F", "/T"],
|
|
2952
|
+
capture_output=True)
|
|
2953
|
+
except Exception as e:
|
|
2954
|
+
logger.debug("Process enumeration fallback failed: %s", e)
|
|
2955
|
+
|
|
2956
|
+
# Wait for process to fully terminate and release file handles.
|
|
2957
|
+
# Verify death before proceeding (Windows needs time to release handles).
|
|
2958
|
+
time.sleep(2)
|
|
2959
|
+
if old_pid and old_pid != os.getpid():
|
|
2960
|
+
for _ in range(6): # Up to 3 more seconds
|
|
2961
|
+
try:
|
|
2962
|
+
result = _sp.run(
|
|
2963
|
+
["tasklist", "/FI", f"PID eq {old_pid}", "/NH", "/FO", "CSV"],
|
|
2964
|
+
capture_output=True, text=True, timeout=5)
|
|
2965
|
+
if str(old_pid) not in result.stdout:
|
|
2966
|
+
break
|
|
2967
|
+
except Exception:
|
|
2968
|
+
break
|
|
2969
|
+
time.sleep(0.5)
|
|
2522
2970
|
|
|
2523
|
-
#
|
|
2971
|
+
# Close our handle and remove stale lock file
|
|
2524
2972
|
self._lock_file.close()
|
|
2525
|
-
self._lock_file = open(lock_path, "w")
|
|
2526
2973
|
try:
|
|
2527
|
-
|
|
2528
|
-
except
|
|
2974
|
+
lock_path.unlink(missing_ok=True)
|
|
2975
|
+
except OSError:
|
|
2976
|
+
pass
|
|
2977
|
+
|
|
2978
|
+
# Retry with backoff — up to 5 attempts (total ~15s)
|
|
2979
|
+
acquired = False
|
|
2980
|
+
for attempt in range(5):
|
|
2981
|
+
try:
|
|
2982
|
+
self._lock_file = open(lock_path, "w")
|
|
2983
|
+
msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
|
|
2984
|
+
acquired = True
|
|
2985
|
+
break
|
|
2986
|
+
except (IOError, OSError):
|
|
2987
|
+
self._lock_file.close()
|
|
2988
|
+
wait = (attempt + 1) * 1 # 1s, 2s, 3s, 4s, 5s
|
|
2989
|
+
logger.warning("Lock retry %d/5 failed, waiting %ds...", attempt + 1, wait)
|
|
2990
|
+
time.sleep(wait)
|
|
2991
|
+
|
|
2992
|
+
if not acquired:
|
|
2529
2993
|
logger.error("Cannot acquire daemon lock — another instance may still be running")
|
|
2530
2994
|
raise SystemExit(1)
|
|
2531
2995
|
|
|
2996
|
+
# Write PID to lock file (for reference, though unreadable while locked)
|
|
2532
2997
|
self._lock_file.seek(0)
|
|
2533
2998
|
self._lock_file.truncate()
|
|
2534
2999
|
self._lock_file.write(str(os.getpid()))
|
|
2535
3000
|
self._lock_file.flush()
|
|
3001
|
+
|
|
3002
|
+
# Write PID to separate unlocked file — always readable by other
|
|
3003
|
+
# processes (Rust manager, NSIS installer, next daemon instance).
|
|
3004
|
+
try:
|
|
3005
|
+
pid_path.write_text(str(os.getpid()))
|
|
3006
|
+
except OSError as e:
|
|
3007
|
+
logger.warning("Could not write daemon.pid: %s", e)
|
|
3008
|
+
|
|
2536
3009
|
logger.info("Acquired exclusive daemon lock (pid=%d)", os.getpid())
|
|
2537
3010
|
return
|
|
2538
3011
|
|
|
@@ -2582,6 +3055,12 @@ class RuntimeDaemon:
|
|
|
2582
3055
|
self._lock_file.flush()
|
|
2583
3056
|
logger.info("Acquired exclusive daemon lock (pid=%d)", os.getpid())
|
|
2584
3057
|
|
|
3058
|
+
# Write PID to separate file for consistency with Windows path
|
|
3059
|
+
try:
|
|
3060
|
+
pid_path.write_text(str(os.getpid()))
|
|
3061
|
+
except OSError:
|
|
3062
|
+
pass
|
|
3063
|
+
|
|
2585
3064
|
# Also clean up CLI daemon PID file if it points to a dead process
|
|
2586
3065
|
cli_pid_file = Path.home() / ".forgexa-daemon.pid"
|
|
2587
3066
|
if cli_pid_file.exists():
|
|
@@ -2623,20 +3102,37 @@ class RuntimeDaemon:
|
|
|
2623
3102
|
", ".join(a.agent_id for a in self.agents))
|
|
2624
3103
|
|
|
2625
3104
|
# 2. Register with all servers
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
self.
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
3105
|
+
# 2. Register with all servers (with retry on transient failures)
|
|
3106
|
+
max_registration_attempts = 5
|
|
3107
|
+
for attempt in range(max_registration_attempts):
|
|
3108
|
+
for url in self.server_urls:
|
|
3109
|
+
if any(c.server_url == url.rstrip("/") for c in self.connections):
|
|
3110
|
+
continue # Already connected to this server
|
|
3111
|
+
conn = ServerConnection(url, self.api_token, self.daemon_id, self.hardware_id)
|
|
3112
|
+
try:
|
|
3113
|
+
await conn.register(self.agents, self.max_concurrent)
|
|
3114
|
+
conn.start_services(self.heartbeat_interval, self.poll_interval, self.agents)
|
|
3115
|
+
await conn.start_heartbeat()
|
|
3116
|
+
self.connections.append(conn)
|
|
3117
|
+
logger.info("[%s] Connected and ready", conn.label)
|
|
3118
|
+
except Exception as e:
|
|
3119
|
+
logger.error("[%s] Failed to connect: %s — skipping this server", conn.label, e)
|
|
3120
|
+
await conn.client.aclose()
|
|
3121
|
+
|
|
3122
|
+
if self.connections:
|
|
3123
|
+
break # At least one server connected
|
|
3124
|
+
|
|
3125
|
+
if attempt < max_registration_attempts - 1:
|
|
3126
|
+
wait = (attempt + 1) * 5 # 5s, 10s, 15s, 20s
|
|
3127
|
+
logger.warning(
|
|
3128
|
+
"No servers reachable (attempt %d/%d). Retrying in %ds...",
|
|
3129
|
+
attempt + 1, max_registration_attempts, wait,
|
|
3130
|
+
)
|
|
3131
|
+
await asyncio.sleep(wait)
|
|
2637
3132
|
|
|
2638
3133
|
if not self.connections:
|
|
2639
|
-
logger.error("Failed to connect to any server. Exiting."
|
|
3134
|
+
logger.error("Failed to connect to any server after %d attempts. Exiting.",
|
|
3135
|
+
max_registration_attempts)
|
|
2640
3136
|
raise SystemExit(1)
|
|
2641
3137
|
|
|
2642
3138
|
logger.info("Daemon ready. Connected to %d server(s). Polling for tasks...",
|
|
@@ -2730,15 +3226,15 @@ class RuntimeDaemon:
|
|
|
2730
3226
|
_line_buffer.extend(lines)
|
|
2731
3227
|
|
|
2732
3228
|
async def _progress_ticker():
|
|
2733
|
-
"""Flush buffered output lines + update progress % every
|
|
3229
|
+
"""Flush buffered output lines + update progress % every 5 s."""
|
|
2734
3230
|
import math as _math
|
|
2735
3231
|
tick = 0
|
|
2736
3232
|
while not progress_stop.is_set():
|
|
2737
|
-
await asyncio.sleep(
|
|
3233
|
+
await asyncio.sleep(5)
|
|
2738
3234
|
if progress_stop.is_set():
|
|
2739
3235
|
break
|
|
2740
3236
|
tick += 1
|
|
2741
|
-
pct = min(int(10 + 80 * (1 - 1 / (1 + tick /
|
|
3237
|
+
pct = min(int(10 + 80 * (1 - 1 / (1 + tick / 16))), 90)
|
|
2742
3238
|
pid = self.process_manager.active_processes.get(task.task_id)
|
|
2743
3239
|
step = "running_agent"
|
|
2744
3240
|
if pid:
|
|
@@ -2775,7 +3271,26 @@ class RuntimeDaemon:
|
|
|
2775
3271
|
tried_agents.add(agent.agent_id)
|
|
2776
3272
|
|
|
2777
3273
|
# ── Agent fallback: if agent hit rate limit or API is unavailable, try next agent ──
|
|
3274
|
+
# Guard: if the agent already produced file changes in the workspace, it DID
|
|
3275
|
+
# meaningful work — don't trigger fallback even if it crashed after completing.
|
|
3276
|
+
# Let the recovery logic (step 4.1) handle non-zero exit with committed work.
|
|
3277
|
+
_skip_fallback = False
|
|
2778
3278
|
if self.process_manager.is_rate_limited(result):
|
|
3279
|
+
_pre_fallback_git = await self.process_manager._collect_git_info(workspace_path)
|
|
3280
|
+
_pre_fallback_committed = await self.process_manager._collect_git_info_vs_parent(workspace_path)
|
|
3281
|
+
has_workspace_changes = (
|
|
3282
|
+
bool(_pre_fallback_git.get("files_changed"))
|
|
3283
|
+
or bool(_pre_fallback_committed.get("files_changed"))
|
|
3284
|
+
)
|
|
3285
|
+
if has_workspace_changes:
|
|
3286
|
+
logger.info(
|
|
3287
|
+
"Agent '%s' exited non-zero for task %s but workspace has changes — "
|
|
3288
|
+
"skipping fallback, proceeding to recovery",
|
|
3289
|
+
agent.agent_id, task.task_id,
|
|
3290
|
+
)
|
|
3291
|
+
_skip_fallback = True
|
|
3292
|
+
|
|
3293
|
+
if self.process_manager.is_rate_limited(result) and not _skip_fallback:
|
|
2779
3294
|
logger.warning(
|
|
2780
3295
|
"Agent '%s' unavailable/rate-limited for task %s, attempting fallback",
|
|
2781
3296
|
agent.agent_id, task.task_id,
|
|
@@ -2805,11 +3320,11 @@ class RuntimeDaemon:
|
|
|
2805
3320
|
async def _progress_ticker2():
|
|
2806
3321
|
tick = 0
|
|
2807
3322
|
while not progress_stop2.is_set():
|
|
2808
|
-
await asyncio.sleep(
|
|
3323
|
+
await asyncio.sleep(5)
|
|
2809
3324
|
if progress_stop2.is_set():
|
|
2810
3325
|
break
|
|
2811
3326
|
tick += 1
|
|
2812
|
-
pct = min(int(10 + 80 * (1 - 1 / (1 + tick /
|
|
3327
|
+
pct = min(int(10 + 80 * (1 - 1 / (1 + tick / 16))), 90)
|
|
2813
3328
|
pid = self.process_manager.active_processes.get(task.task_id)
|
|
2814
3329
|
step = f"running_agent:{agent.agent_id}"
|
|
2815
3330
|
if pid:
|
|
@@ -2922,13 +3437,33 @@ class RuntimeDaemon:
|
|
|
2922
3437
|
# Existing files from a prior iteration are not sufficient evidence.
|
|
2923
3438
|
if result.status == "success" and task.node_type in ("analysis", "design"):
|
|
2924
3439
|
committed_git = await self.process_manager._collect_git_info_vs_parent(workspace_path)
|
|
2925
|
-
|
|
3440
|
+
git_check_passed = self.process_manager._has_required_deliverable_updates(
|
|
2926
3441
|
task,
|
|
2927
3442
|
pre_commit_git.get("files_changed"),
|
|
2928
3443
|
committed_git.get("files_changed"),
|
|
2929
3444
|
result.files_changed,
|
|
2930
3445
|
(result.git or {}).get("files_changed"),
|
|
2931
|
-
)
|
|
3446
|
+
)
|
|
3447
|
+
# Fallback: if git-based check fails (e.g., agent auto-committed and
|
|
3448
|
+
# merge-base detection failed), verify files physically exist on disk.
|
|
3449
|
+
# This prevents false failures when git state is unusual but files
|
|
3450
|
+
# are actually present.
|
|
3451
|
+
if not git_check_passed:
|
|
3452
|
+
required_paths = self.process_manager._required_deliverable_paths(task)
|
|
3453
|
+
if required_paths:
|
|
3454
|
+
files_exist = all(
|
|
3455
|
+
(workspace_path / p).exists() and (workspace_path / p).stat().st_size > 0
|
|
3456
|
+
for p in required_paths
|
|
3457
|
+
)
|
|
3458
|
+
if files_exist:
|
|
3459
|
+
logger.info(
|
|
3460
|
+
"Task %s (%s): git diff did not show deliverables but all %d "
|
|
3461
|
+
"files exist on disk — accepting as success",
|
|
3462
|
+
task.task_id, task.node_type, len(required_paths),
|
|
3463
|
+
)
|
|
3464
|
+
git_check_passed = True
|
|
3465
|
+
|
|
3466
|
+
if not git_check_passed:
|
|
2932
3467
|
logger.warning(
|
|
2933
3468
|
"Task %s (%s) reported success but did not update required deliverables",
|
|
2934
3469
|
task.task_id, task.node_type,
|
|
@@ -2955,6 +3490,16 @@ class RuntimeDaemon:
|
|
|
2955
3490
|
if commit_result:
|
|
2956
3491
|
# Propagate push/commit errors in metrics so they're visible
|
|
2957
3492
|
result.metrics.update(commit_result)
|
|
3493
|
+
# Push failure is a real problem for downstream nodes — mark
|
|
3494
|
+
# as failed so the orchestrator can retry (transient network).
|
|
3495
|
+
if commit_result.get("push_error"):
|
|
3496
|
+
push_err = commit_result["push_error"]
|
|
3497
|
+
logger.error(
|
|
3498
|
+
"Task %s: push failed, marking as failed so retry can attempt push again: %s",
|
|
3499
|
+
task.task_id, push_err,
|
|
3500
|
+
)
|
|
3501
|
+
result.status = "failed"
|
|
3502
|
+
result.error = f"Git push failed: {push_err}"
|
|
2958
3503
|
# Re-collect git info after commit (compare with parent)
|
|
2959
3504
|
post_commit_git = await self.process_manager._collect_git_info_vs_parent(workspace_path)
|
|
2960
3505
|
# Merge: use the pre-commit file list if post-commit is empty
|
|
@@ -3057,15 +3602,13 @@ class RuntimeDaemon:
|
|
|
3057
3602
|
|
|
3058
3603
|
if node_type == "analysis":
|
|
3059
3604
|
# Use type profile to determine required analysis outputs
|
|
3060
|
-
|
|
3061
|
-
from app.services.type_workflow_profiles import get_profile
|
|
3062
|
-
profile = get_profile(req_type)
|
|
3063
|
-
required_files = profile.analysis_outputs
|
|
3064
|
-
except Exception:
|
|
3065
|
-
# Fallback to full set if profile import fails
|
|
3066
|
-
required_files = ["PRD.md", "SDD.md", "TASKS.md", "analysis.json", "test-intent.json"]
|
|
3605
|
+
required_files = _get_analysis_outputs_for_type(req_type)
|
|
3067
3606
|
|
|
3068
|
-
|
|
3607
|
+
# Analysis deliverables live in analysis_output_dir (docs/requirements/...)
|
|
3608
|
+
doc_dir = (
|
|
3609
|
+
(task.input_data or {}).get("analysis_output_dir", "")
|
|
3610
|
+
or (task.input_data or {}).get("output_dir", "")
|
|
3611
|
+
)
|
|
3069
3612
|
if doc_dir:
|
|
3070
3613
|
base = workspace_path / doc_dir
|
|
3071
3614
|
else:
|
|
@@ -3300,20 +3843,25 @@ class RuntimeDaemon:
|
|
|
3300
3843
|
always receives the file contents via the completion report and gate
|
|
3301
3844
|
reviewers can see the analysis documents immediately.
|
|
3302
3845
|
"""
|
|
3303
|
-
|
|
3846
|
+
# Analysis deliverables live in analysis_output_dir (docs/requirements/...)
|
|
3847
|
+
doc_dir = (
|
|
3848
|
+
(task.input_data or {}).get("analysis_output_dir", "")
|
|
3849
|
+
or (task.input_data or {}).get("output_dir", "")
|
|
3850
|
+
)
|
|
3304
3851
|
if not doc_dir:
|
|
3305
3852
|
return
|
|
3306
3853
|
|
|
3307
3854
|
base = workspace_path / doc_dir.lstrip("./")
|
|
3308
|
-
|
|
3309
|
-
|
|
3855
|
+
req_type = (task.input_data or {}).get("requirement_type", "feature")
|
|
3856
|
+
_ANALYSIS_FILES = _get_analysis_outputs_for_type(req_type)
|
|
3857
|
+
existing_artifact_paths = {a.get("path", "").replace("\\", "/") for a in result.artifacts}
|
|
3310
3858
|
|
|
3311
3859
|
for fname in _ANALYSIS_FILES:
|
|
3312
3860
|
fpath = base / fname
|
|
3313
3861
|
if not fpath.exists() or fpath.stat().st_size == 0:
|
|
3314
3862
|
continue
|
|
3315
3863
|
try:
|
|
3316
|
-
rel_path = str(fpath.relative_to(workspace_path))
|
|
3864
|
+
rel_path = str(fpath.relative_to(workspace_path)).replace("\\", "/")
|
|
3317
3865
|
if rel_path in existing_artifact_paths:
|
|
3318
3866
|
continue # already attached
|
|
3319
3867
|
content = fpath.read_text(encoding="utf-8", errors="replace")
|
|
@@ -3343,13 +3891,13 @@ class RuntimeDaemon:
|
|
|
3343
3891
|
return
|
|
3344
3892
|
|
|
3345
3893
|
base = workspace_path / doc_dir.lstrip("./")
|
|
3346
|
-
existing_artifact_paths = {a.get("path", "") for a in result.artifacts}
|
|
3894
|
+
existing_artifact_paths = {a.get("path", "").replace("\\", "/") for a in result.artifacts}
|
|
3347
3895
|
|
|
3348
3896
|
design_path = base / "design.md"
|
|
3349
3897
|
if not design_path.exists() or design_path.stat().st_size == 0:
|
|
3350
3898
|
return
|
|
3351
3899
|
try:
|
|
3352
|
-
rel_path = str(design_path.relative_to(workspace_path))
|
|
3900
|
+
rel_path = str(design_path.relative_to(workspace_path)).replace("\\", "/")
|
|
3353
3901
|
if rel_path in existing_artifact_paths:
|
|
3354
3902
|
return
|
|
3355
3903
|
content = design_path.read_text(encoding="utf-8", errors="replace")
|
|
@@ -3411,6 +3959,8 @@ class RuntimeDaemon:
|
|
|
3411
3959
|
or task.input_data.get("title")
|
|
3412
3960
|
or ""
|
|
3413
3961
|
)
|
|
3962
|
+
if not isinstance(wi_title, str):
|
|
3963
|
+
wi_title = str(wi_title)
|
|
3414
3964
|
req_key = task.requirement_key or task.work_item.get("requirement_key") or ""
|
|
3415
3965
|
if req_key and wi_title:
|
|
3416
3966
|
display_title = f"{req_key}: {wi_title}"
|
|
@@ -3421,11 +3971,15 @@ class RuntimeDaemon:
|
|
|
3421
3971
|
else:
|
|
3422
3972
|
display_title = task.task_id
|
|
3423
3973
|
|
|
3424
|
-
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3974
|
+
try:
|
|
3975
|
+
commit_msg = await self._build_auto_commit_message(
|
|
3976
|
+
display_title, task.task_id, task.node_type,
|
|
3977
|
+
task.agent_type, change_summary,
|
|
3978
|
+
workspace_path=workspace_path,
|
|
3979
|
+
)
|
|
3980
|
+
except Exception as msg_err:
|
|
3981
|
+
logger.warning("Failed to build rich commit message: %s — using fallback", msg_err)
|
|
3982
|
+
commit_msg = f"{task.node_type}({task.requirement_key or task.task_id}): {display_title}"
|
|
3429
3983
|
proc = await asyncio.create_subprocess_exec(
|
|
3430
3984
|
"git", "commit", "-m", commit_msg,
|
|
3431
3985
|
cwd=str(workspace_path),
|
|
@@ -3622,7 +4176,22 @@ class RuntimeDaemon:
|
|
|
3622
4176
|
lines: list[str] = []
|
|
3623
4177
|
|
|
3624
4178
|
# Summary — word-wrap at 78 chars
|
|
3625
|
-
|
|
4179
|
+
raw_summary = data.get("summary")
|
|
4180
|
+
if isinstance(raw_summary, dict):
|
|
4181
|
+
# Some agents produce summary as a structured object; extract description
|
|
4182
|
+
summary = (
|
|
4183
|
+
raw_summary.get("description")
|
|
4184
|
+
or raw_summary.get("title")
|
|
4185
|
+
or raw_summary.get("summary")
|
|
4186
|
+
or ""
|
|
4187
|
+
)
|
|
4188
|
+
if not isinstance(summary, str):
|
|
4189
|
+
summary = str(summary) if summary else ""
|
|
4190
|
+
elif isinstance(raw_summary, str):
|
|
4191
|
+
summary = raw_summary
|
|
4192
|
+
else:
|
|
4193
|
+
summary = str(raw_summary) if raw_summary else ""
|
|
4194
|
+
summary = summary.strip()
|
|
3626
4195
|
if summary:
|
|
3627
4196
|
words = summary.split()
|
|
3628
4197
|
current = ""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|