forgexa-cli 1.3.2__tar.gz → 1.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: forgexa-cli
3
- Version: 1.3.2
3
+ Version: 1.3.4
4
4
  Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
5
5
  Author-email: Jason Sun <dev.winds@gmail.com>
6
6
  License: MIT
@@ -1,2 +1,2 @@
1
1
  """forgexa-cli — Forgexa command-line client."""
2
- __version__ = "1.3.2"
2
+ __version__ = "1.3.4"
@@ -993,9 +993,19 @@ class WorkspaceManager:
993
993
  os.write(fd, b"\n")
994
994
  os.close(fd)
995
995
  os.chmod(key_path, stat_mod.S_IRUSR)
996
+ # On Windows, convert backslashes to forward slashes and quote
997
+ # the path. Git invokes GIT_SSH_COMMAND via MSYS2 shell which
998
+ # interprets backslashes as escape sequences, corrupting the
999
+ # path (e.g. C:\Users → C:Users).
1000
+ key_path_safe = key_path.replace("\\", "/") if sys.platform == "win32" else key_path
996
1001
  env = {
997
1002
  **os.environ,
998
- "GIT_SSH_COMMAND": f"ssh -i {key_path} -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null",
1003
+ "GIT_SSH_COMMAND": (
1004
+ f'ssh -i "{key_path_safe}"'
1005
+ f" -o StrictHostKeyChecking=accept-new"
1006
+ f" -o UserKnownHostsFile=/dev/null"
1007
+ f" -o IdentitiesOnly=yes"
1008
+ ),
999
1009
  }
1000
1010
  except Exception:
1001
1011
  try:
@@ -1031,10 +1041,12 @@ class WorkspaceManager:
1031
1041
  # Clean up temp SSH key file if created
1032
1042
  if env and "GIT_SSH_COMMAND" in env:
1033
1043
  import re as _re
1034
- m = _re.search(r"-i\s+(\S+)", env["GIT_SSH_COMMAND"])
1044
+ m = _re.search(r'-i\s+"?([^"\s]+)"?', env["GIT_SSH_COMMAND"])
1035
1045
  if m:
1046
+ key_file = m.group(1)
1047
+ # Resolve forward-slash path back to native for unlink
1036
1048
  try:
1037
- os.unlink(m.group(1))
1049
+ os.unlink(key_file)
1038
1050
  except OSError:
1039
1051
  pass
1040
1052
  if proc.returncode != 0:
@@ -1422,10 +1434,19 @@ class ProcessManager:
1422
1434
  while True:
1423
1435
  try:
1424
1436
  line_bytes = await proc.stdout.readline()
1425
- except ValueError:
1426
- # Line exceeded stream buffer limit – fall back to reading
1427
- # remaining data in bulk to avoid losing output.
1428
- remaining = await proc.stdout.read()
1437
+ except (ValueError, asyncio.LimitOverrunError, Exception) as exc:
1438
+ # Line exceeded stream buffer limit (LimitOverrunError
1439
+ # converted to ValueError by readline(), but catch broadly
1440
+ # to handle edge cases in different Python versions).
1441
+ # Fall back to reading remaining data in bulk.
1442
+ logger.warning(
1443
+ "Stream read error for task %s (%s: %s), draining remaining output",
1444
+ task_id, type(exc).__name__, exc,
1445
+ )
1446
+ try:
1447
+ remaining = await proc.stdout.read()
1448
+ except Exception:
1449
+ remaining = b""
1429
1450
  if remaining:
1430
1451
  for chunk_line in remaining.decode(errors="replace").split("\n"):
1431
1452
  if chunk_line:
@@ -1499,7 +1520,7 @@ class ProcessManager:
1499
1520
  stdin=asyncio.subprocess.PIPE,
1500
1521
  cwd=str(cwd),
1501
1522
  env=env,
1502
- limit=10 * 1024 * 1024, # 10MB line buffer for large JSON output
1523
+ limit=100 * 1024 * 1024, # 100MB line buffer for large JSON output from long sessions
1503
1524
  )
1504
1525
  self.active_processes[task_id] = proc
1505
1526
  stdout, stderr, returncode = await self._stream_process(
@@ -1533,6 +1554,17 @@ class ProcessManager:
1533
1554
  status="failed", exit_code=-1, stdout="", stderr="",
1534
1555
  error=f"Timed out after {timeout}s",
1535
1556
  )
1557
+ except Exception as exc:
1558
+ logger.exception("Claude stream error for task %s", task_id)
1559
+ if task_id in self.active_processes:
1560
+ try:
1561
+ self.active_processes[task_id].kill()
1562
+ except Exception:
1563
+ pass
1564
+ return TaskResult(
1565
+ status="failed", exit_code=-1, stdout="", stderr="",
1566
+ error=f"Stream processing error: {exc}",
1567
+ )
1536
1568
  finally:
1537
1569
  self.active_processes.pop(task_id, None)
1538
1570
 
@@ -1600,7 +1632,7 @@ class ProcessManager:
1600
1632
  stderr=asyncio.subprocess.PIPE,
1601
1633
  stdin=asyncio.subprocess.PIPE if stdin_input else None,
1602
1634
  cwd=str(cwd),
1603
- limit=10 * 1024 * 1024, # 10MB line buffer for large agent output
1635
+ limit=100 * 1024 * 1024, # 100MB line buffer for large agent output
1604
1636
  )
1605
1637
  self.active_processes[task_id] = proc
1606
1638
  stdin_bytes = stdin_input.encode() if stdin_input else None
@@ -1622,6 +1654,17 @@ class ProcessManager:
1622
1654
  status="failed", exit_code=-1, stdout="", stderr="",
1623
1655
  error=f"Timed out after {timeout}s",
1624
1656
  )
1657
+ except Exception as exc:
1658
+ logger.exception("CLI stream error for task %s", task_id)
1659
+ if task_id in self.active_processes:
1660
+ try:
1661
+ self.active_processes[task_id].kill()
1662
+ except Exception:
1663
+ pass
1664
+ return TaskResult(
1665
+ status="failed", exit_code=-1, stdout="", stderr="",
1666
+ error=f"Stream processing error: {exc}",
1667
+ )
1625
1668
  finally:
1626
1669
  self.active_processes.pop(task_id, None)
1627
1670
 
@@ -2499,40 +2542,118 @@ class RuntimeDaemon:
2499
2542
  CLI starts, etc.
2500
2543
  """
2501
2544
  lock_path = Path.home() / ".forgexa" / "daemon" / "daemon.lock"
2545
+ pid_path = Path.home() / ".forgexa" / "daemon" / "daemon.pid"
2502
2546
  lock_path.parent.mkdir(parents=True, exist_ok=True)
2503
2547
 
2504
2548
  if sys.platform == "win32":
2505
- # Windows: use msvcrt file locking
2549
+ # Windows: use msvcrt file locking.
2550
+ #
2551
+ # IMPORTANT: msvcrt.locking() creates mandatory byte-range locks
2552
+ # that prevent OTHER processes from reading the locked bytes.
2553
+ # Therefore we store the PID in a separate daemon.pid file that
2554
+ # is never locked, so we can always read the old daemon's PID.
2506
2555
  import msvcrt
2507
2556
 
2557
+ # ── Step 1: read old PID BEFORE touching the lock file ──
2558
+ old_pid = None
2559
+ try:
2560
+ if pid_path.exists():
2561
+ old_pid = int(pid_path.read_text().strip())
2562
+ except (ValueError, OSError):
2563
+ pass
2564
+
2565
+ # ── Step 2: try to acquire the lock ──
2508
2566
  self._lock_file = open(lock_path, "w")
2509
2567
  try:
2510
2568
  msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
2511
2569
  except (IOError, OSError):
2512
- # Lock held try to kill old process via PID file
2513
- try:
2514
- old_pid = int(lock_path.read_text().strip())
2515
- logger.warning("Another daemon is running (PID %d). Terminating...", old_pid)
2516
- import subprocess as _sp
2517
- _sp.run(["taskkill", "/PID", str(old_pid), "/F"],
2570
+ # Lock held by another daemon kill it
2571
+ import subprocess as _sp
2572
+
2573
+ if old_pid and old_pid != os.getpid():
2574
+ logger.warning("Another daemon (PID %d) holds the lock. Killing...", old_pid)
2575
+ _sp.run(["taskkill", "/PID", str(old_pid), "/F", "/T"],
2518
2576
  capture_output=True)
2519
- time.sleep(1)
2520
- except (ValueError, FileNotFoundError, PermissionError, OSError):
2521
- pass
2577
+ else:
2578
+ # No daemon.pid or PID matches us — find by process enumeration.
2579
+ # Uses PowerShell Get-CimInstance (reliable on all modern Windows).
2580
+ # wmic is deprecated since Windows 10 21H2 / Windows 11.
2581
+ logger.warning("No daemon PID file; killing daemon by process enumeration...")
2582
+ try:
2583
+ ps_script = (
2584
+ "Get-CimInstance Win32_Process | "
2585
+ "Where-Object { "
2586
+ "($_.CommandLine -like '*daemon.py*' -or $_.Name -eq 'forgexa-daemon.exe') "
2587
+ "-and $_.ProcessId -ne " + str(os.getpid()) + " } | "
2588
+ "Select-Object -ExpandProperty ProcessId"
2589
+ )
2590
+ result = _sp.run(
2591
+ ["powershell", "-NoProfile", "-NonInteractive", "-Command", ps_script],
2592
+ capture_output=True, text=True, timeout=15)
2593
+ for line in result.stdout.strip().splitlines():
2594
+ line = line.strip()
2595
+ if line.isdigit():
2596
+ pid = int(line)
2597
+ if pid != os.getpid():
2598
+ logger.info("Killing orphan daemon process (PID %d)", pid)
2599
+ _sp.run(["taskkill", "/PID", str(pid), "/F", "/T"],
2600
+ capture_output=True)
2601
+ except Exception as e:
2602
+ logger.debug("Process enumeration fallback failed: %s", e)
2603
+
2604
+ # Wait for process to fully terminate and release file handles.
2605
+ # Verify death before proceeding (Windows needs time to release handles).
2606
+ time.sleep(2)
2607
+ if old_pid and old_pid != os.getpid():
2608
+ for _ in range(6): # Up to 3 more seconds
2609
+ try:
2610
+ result = _sp.run(
2611
+ ["tasklist", "/FI", f"PID eq {old_pid}", "/NH", "/FO", "CSV"],
2612
+ capture_output=True, text=True, timeout=5)
2613
+ if str(old_pid) not in result.stdout:
2614
+ break
2615
+ except Exception:
2616
+ break
2617
+ time.sleep(0.5)
2522
2618
 
2523
- # Retry
2619
+ # Close our handle and remove stale lock file
2524
2620
  self._lock_file.close()
2525
- self._lock_file = open(lock_path, "w")
2526
2621
  try:
2527
- msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
2528
- except (IOError, OSError):
2622
+ lock_path.unlink(missing_ok=True)
2623
+ except OSError:
2624
+ pass
2625
+
2626
+ # Retry with backoff — up to 5 attempts (total ~15s)
2627
+ acquired = False
2628
+ for attempt in range(5):
2629
+ try:
2630
+ self._lock_file = open(lock_path, "w")
2631
+ msvcrt.locking(self._lock_file.fileno(), msvcrt.LK_NBLCK, 1)
2632
+ acquired = True
2633
+ break
2634
+ except (IOError, OSError):
2635
+ self._lock_file.close()
2636
+ wait = (attempt + 1) * 1 # 1s, 2s, 3s, 4s, 5s
2637
+ logger.warning("Lock retry %d/5 failed, waiting %ds...", attempt + 1, wait)
2638
+ time.sleep(wait)
2639
+
2640
+ if not acquired:
2529
2641
  logger.error("Cannot acquire daemon lock — another instance may still be running")
2530
2642
  raise SystemExit(1)
2531
2643
 
2644
+ # Write PID to lock file (for reference, though unreadable while locked)
2532
2645
  self._lock_file.seek(0)
2533
2646
  self._lock_file.truncate()
2534
2647
  self._lock_file.write(str(os.getpid()))
2535
2648
  self._lock_file.flush()
2649
+
2650
+ # Write PID to separate unlocked file — always readable by other
2651
+ # processes (Rust manager, NSIS installer, next daemon instance).
2652
+ try:
2653
+ pid_path.write_text(str(os.getpid()))
2654
+ except OSError as e:
2655
+ logger.warning("Could not write daemon.pid: %s", e)
2656
+
2536
2657
  logger.info("Acquired exclusive daemon lock (pid=%d)", os.getpid())
2537
2658
  return
2538
2659
 
@@ -2582,6 +2703,12 @@ class RuntimeDaemon:
2582
2703
  self._lock_file.flush()
2583
2704
  logger.info("Acquired exclusive daemon lock (pid=%d)", os.getpid())
2584
2705
 
2706
+ # Write PID to separate file for consistency with Windows path
2707
+ try:
2708
+ pid_path.write_text(str(os.getpid()))
2709
+ except OSError:
2710
+ pass
2711
+
2585
2712
  # Also clean up CLI daemon PID file if it points to a dead process
2586
2713
  cli_pid_file = Path.home() / ".forgexa-daemon.pid"
2587
2714
  if cli_pid_file.exists():
@@ -2623,20 +2750,37 @@ class RuntimeDaemon:
2623
2750
  ", ".join(a.agent_id for a in self.agents))
2624
2751
 
2625
2752
  # 2. Register with all servers
2626
- for url in self.server_urls:
2627
- conn = ServerConnection(url, self.api_token, self.daemon_id, self.hardware_id)
2628
- try:
2629
- await conn.register(self.agents, self.max_concurrent)
2630
- conn.start_services(self.heartbeat_interval, self.poll_interval, self.agents)
2631
- await conn.start_heartbeat()
2632
- self.connections.append(conn)
2633
- logger.info("[%s] Connected and ready", conn.label)
2634
- except Exception as e:
2635
- logger.error("[%s] Failed to connect: %s — skipping this server", conn.label, e)
2636
- await conn.client.aclose()
2753
+ # 2. Register with all servers (with retry on transient failures)
2754
+ max_registration_attempts = 5
2755
+ for attempt in range(max_registration_attempts):
2756
+ for url in self.server_urls:
2757
+ if any(c.server_url == url.rstrip("/") for c in self.connections):
2758
+ continue # Already connected to this server
2759
+ conn = ServerConnection(url, self.api_token, self.daemon_id, self.hardware_id)
2760
+ try:
2761
+ await conn.register(self.agents, self.max_concurrent)
2762
+ conn.start_services(self.heartbeat_interval, self.poll_interval, self.agents)
2763
+ await conn.start_heartbeat()
2764
+ self.connections.append(conn)
2765
+ logger.info("[%s] Connected and ready", conn.label)
2766
+ except Exception as e:
2767
+ logger.error("[%s] Failed to connect: %s — skipping this server", conn.label, e)
2768
+ await conn.client.aclose()
2769
+
2770
+ if self.connections:
2771
+ break # At least one server connected
2772
+
2773
+ if attempt < max_registration_attempts - 1:
2774
+ wait = (attempt + 1) * 5 # 5s, 10s, 15s, 20s
2775
+ logger.warning(
2776
+ "No servers reachable (attempt %d/%d). Retrying in %ds...",
2777
+ attempt + 1, max_registration_attempts, wait,
2778
+ )
2779
+ await asyncio.sleep(wait)
2637
2780
 
2638
2781
  if not self.connections:
2639
- logger.error("Failed to connect to any server. Exiting.")
2782
+ logger.error("Failed to connect to any server after %d attempts. Exiting.",
2783
+ max_registration_attempts)
2640
2784
  raise SystemExit(1)
2641
2785
 
2642
2786
  logger.info("Daemon ready. Connected to %d server(s). Polling for tasks...",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: forgexa-cli
3
- Version: 1.3.2
3
+ Version: 1.3.4
4
4
  Summary: Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform
5
5
  Author-email: Jason Sun <dev.winds@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "forgexa-cli"
3
- version = "1.3.2"
3
+ version = "1.3.4"
4
4
  description = "Forgexa CLI — command-line client and AI agent runtime for the Forgexa platform"
5
5
  requires-python = ">=3.9"
6
6
  license = { text = "MIT" }
File without changes
File without changes