PyPI - debugger-help - Versions diffs - 3.0.2__tar.gz → 4.0.0__tar.gz - Mend

debugger-help 3.0.2tar.gz → 4.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{debugger_help-3.0.2 → debugger_help-4.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: debugger-help
-Version: 3.0.2
+Version: 4.0.0
 Summary: debugger.help VPS Agent — Deep system monitoring for logs, GPU, PM2, Docker, and more
 Author: debugger.help
 License: MIT

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
 """debugger.help VPS Agent — Deep system monitoring."""
-__version__ = "3.0.2"
+__version__ = "4.0.0"

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help/agent.py RENAMED Viewed

@@ -1,29 +1,11 @@
 #!/usr/bin/env python3
 """
-debugger.help VPS Agent v3 — Ultimate Deep Debugger
-Captures EVERYTHING from your VPS:
-- pm2 logs, status, process list, restart counts
-- GPU: VRAM, temp, power, utilization, ECC errors, throttling, driver version, CUDA version
-- CPU: per-core usage, load average, frequency, context switches
-- Memory: RAM, swap, shared, buffers, cached
-- Disk: usage per mount, I/O read/write rates, inode usage
-- Network: per-interface stats, connection counts by state, open ports, DNS resolution
-- Processes: top CPU/memory consumers, zombie/defunct processes, open file descriptors
-- Docker containers (if running): status, CPU, memory, restart counts
-- Systemd services: failed units
-- SSL certificates: expiry checks
-- File watchers: key log files (syslog, dmesg, pm2 logs, custom)
-- Image generation: Flux/SDXL/ComfyUI pipeline errors, model load times, inference timing
-- Environment: all relevant env vars (sanitized), Python packages, Node packages
-Usage:
-    pip install psutil requests
-    # Optional: pip install pynvml docker
-    export DEBUGGER_API_KEY="sk_your_api_key_here"
-    export DEBUGGER_INGEST_URL="https://your-project.supabase.co/functions/v1/ingest"
-    python debugger_agent.py
+debugger.help VPS Agent v4.0.0 — Action-Based Execution
+Key changes from v3:
+- Action-based command system: only predefined actions execute, no raw commands
+- Fixed error/warning log flood: deduplication + excluded agent's own logs
+- Deterministic execution: AI selects from action registry, agent maps to commands
 """
 import os
@@ -67,7 +49,7 @@ try:
         GPU_DRIVER_VERSION = GPU_DRIVER_VERSION.decode()
     try:
         GPU_CUDA_VERSION = pynvml.nvmlSystemGetCudaDriverVersion_v2()
-        GPU_CUDA_VERSION = f"{GPU_CUDA_VERSION // 1000}.{(GPU_CUDA_VERSION % 1000) // 10}"
+        GPU_CUDA_VERSION = "{}.{}".format(GPU_CUDA_VERSION // 1000, (GPU_CUDA_VERSION % 1000) // 10)
     except Exception:
         GPU_CUDA_VERSION = "unknown"
 except (ImportError, Exception):
@@ -93,10 +75,13 @@ logger = logging.getLogger("debugger-agent")
 # Configuration
 API_KEY = os.environ.get("DEBUGGER_API_KEY", "")
 INGEST_URL = os.environ.get("DEBUGGER_INGEST_URL", "")
-SOURCE_NAME = os.environ.get("DEBUGGER_SOURCE", f"vps-{socket.gethostname()}")
+SOURCE_NAME = os.environ.get("DEBUGGER_SOURCE", "vps-{}".format(socket.gethostname()))
 PLATFORM = os.environ.get("DEBUGGER_PLATFORM", "Python (VPS)")
 INTERVAL = int(os.environ.get("DEBUGGER_INTERVAL", "10"))
-VERSION = "3.0.2"
+VERSION = "4.0.0"
+# Derive poll-commands URL from ingest URL
+POLL_COMMANDS_URL = INGEST_URL.replace("/ingest", "/poll-commands") if INGEST_URL else ""
 # Additional log files to watch
 WATCH_LOG_FILES = [
@@ -123,22 +108,24 @@ session.mount("https://", HTTPAdapter(max_retries=retry))
 session.mount("http://", HTTPAdapter(max_retries=retry))
 HEADERS = {
-    "Authorization": f"Bearer {API_KEY}",
+    "Authorization": "Bearer {}".format(API_KEY),
     "Content-Type": "application/json",
 }
-def send(payload: dict) -> bool:
+def send(payload):
     """Send payload with auto-retry."""
     try:
         resp = session.post(INGEST_URL, json=payload, headers=HEADERS, timeout=15)
+        if resp.status_code != 200:
+            logger.warning("Send failed [%s %s]: %s", payload.get("type"), resp.status_code, resp.text[:200])
         return resp.status_code == 200
     except Exception as e:
-        logger.debug(f"Send failed (will retry): {e}")
+        logger.warning("Send error [%s]: %s", payload.get("type"), e)
         return False
-def run_cmd(cmd: str, timeout: int = 10) -> str:
+def run_cmd(cmd, timeout=10):
     """Run shell command and return output."""
     try:
         result = subprocess.run(
@@ -146,26 +133,64 @@ def run_cmd(cmd: str, timeout: int = 10) -> str:
         )
         return (result.stdout + result.stderr).strip()
     except subprocess.TimeoutExpired:
-        return f"[timeout after {timeout}s]"
+        return "[timeout after {}s]".format(timeout)
     except Exception as e:
-        return f"[error: {e}]"
+        return "[error: {}]".format(e)
+# =============================================================================
+# ACTION REGISTRY — the ONLY commands that can be executed remotely
+# =============================================================================
+ACTION_COMMANDS = {
+    "check_gpu": "nvidia-smi",
+    "check_gpu_detailed": "nvidia-smi --query-gpu=memory.used,memory.total,utilization.gpu,temperature.gpu --format=csv,noheader,nounits",
+    "check_docker": "systemctl status docker",
+    "restart_docker": "sudo systemctl restart docker",
+    "check_comfy": "pm2 logs yourstudio-gpu --nostream --lines 100 2>/dev/null || pm2 logs --nostream --lines 100 2>/dev/null",
+    "restart_comfy": "pm2 restart yourstudio-gpu 2>/dev/null || pm2 restart all",
+    "check_processes": "ps aux --sort=-%cpu | head -30",
+    "check_ports": "ss -tulnp",
+    "check_health": "curl -fsS --connect-timeout 5 --max-time 10 http://127.0.0.1:8188/health 2>&1 || echo 'Health check failed'",
+    "check_disk": "df -h",
+    "check_memory": "free -h",
+    "check_uptime": "uptime",
+    "check_pm2": "pm2 list",
+    "check_systemd": "systemctl --failed --no-pager --plain 2>/dev/null | head -20",
+    "check_journal_errors": "journalctl -p err --since '1 hour ago' --no-pager -q 2>/dev/null | tail -50",
+    "check_dmesg": "dmesg --time-format iso -T 2>/dev/null | tail -50",
+    "check_network": "ip addr show",
+}
+# Actions that are never auto-executed even if the server says so
+DANGEROUS_ACTIONS = {"restart_docker", "restart_comfy"}
-# --- Stdout/Stderr Capture ---
+# =============================================================================
+# Stdout/Stderr Capture (with deduplication)
+# =============================================================================
 class StreamCapture(io.TextIOBase):
-    """Captures writes to stdout/stderr and sends them as logs."""
+    """Captures writes to stdout/stderr and buffers them for batch sending."""
     def __init__(self, original_stream, level="info", max_buffer=500):
         self.original = original_stream
         self.level = level
         self.buffer = deque(maxlen=max_buffer)
+        self.pending = deque(maxlen=100)
         self.lock = threading.Lock()
+        self._sending = False
+        self._recent_hashes = deque(maxlen=200)
     def write(self, text):
         self.original.write(text)
-        if text.strip():
+        if text.strip() and not self._sending:
+            msg_hash = hash(text.strip()[:200])
             with self.lock:
+                # Deduplicate: skip if we've seen this exact message recently
+                if msg_hash in self._recent_hashes:
+                    return len(text)
+                self._recent_hashes.append(msg_hash)
                 self.buffer.append(text.strip())
                 lower = text.lower()
                 detected_level = self.level
@@ -176,29 +201,43 @@ class StreamCapture(io.TextIOBase):
                     detected_level = "error"
                 elif any(kw in lower for kw in ["warning", "warn", "deprecat"]):
                     detected_level = "warn"
+                self.pending.append((detected_level, text.strip()[:2000]))
+        return len(text)
+    def flush(self):
+        self.original.flush()
+    def flush_pending(self):
+        """Send buffered messages from main loop to avoid recursion."""
+        items = []
+        with self.lock:
+            while self.pending:
+                items.append(self.pending.popleft())
+        self._sending = True
+        try:
+            for level, msg in items:
                 send({
                     "type": "log",
                     "source": SOURCE_NAME,
                     "platform": PLATFORM,
                     "version": VERSION,
-                    "level": detected_level,
-                    "message": f"[std{self.level}] {text.strip()[:2000]}",
-                    "context": {"capturedFrom": f"std{self.level}"},
+                    "level": level,
+                    "message": "[std{}] {}".format(self.level, msg),
+                    "context": {"capturedFrom": "std{}".format(self.level)},
                 })
-        return len(text)
-    def flush(self):
-        self.original.flush()
+        finally:
+            self._sending = False
     def get_recent(self, n=100):
         with self.lock:
             return list(self.buffer)[-n:]
-# --- GPU Metrics (Deep) ---
+# =============================================================================
+# GPU Metrics (Deep)
+# =============================================================================
-def get_gpu_metrics() -> dict:
+def get_gpu_metrics():
     if not GPU_AVAILABLE:
         return {}
     try:
@@ -227,45 +266,33 @@ def get_gpu_metrics() -> dict:
                 "power_w": round(power, 1),
             }
-            # Power limit
             try:
                 power_limit = pynvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000
                 gpu_info["power_limit_w"] = round(power_limit, 1)
             except Exception:
                 pass
-            # Throttle reasons
             try:
                 throttle = pynvml.nvmlDeviceGetCurrentClocksThrottleReasons(handle)
                 reasons = []
-                if throttle & 0x0000000000000002:
-                    reasons.append("idle")
-                if throttle & 0x0000000000000004:
-                    reasons.append("app_clocks")
-                if throttle & 0x0000000000000008:
-                    reasons.append("sw_power_cap")
-                if throttle & 0x0000000000000020:
-                    reasons.append("hw_slowdown")
-                if throttle & 0x0000000000000040:
-                    reasons.append("sync_boost")
-                if throttle & 0x0000000000000080:
-                    reasons.append("sw_thermal")
-                if throttle & 0x0000000000000100:
-                    reasons.append("hw_thermal")
-                if throttle & 0x0000000000000200:
-                    reasons.append("hw_power_brake")
+                if throttle & 0x0000000000000002: reasons.append("idle")
+                if throttle & 0x0000000000000004: reasons.append("app_clocks")
+                if throttle & 0x0000000000000008: reasons.append("sw_power_cap")
+                if throttle & 0x0000000000000020: reasons.append("hw_slowdown")
+                if throttle & 0x0000000000000040: reasons.append("sync_boost")
+                if throttle & 0x0000000000000080: reasons.append("sw_thermal")
+                if throttle & 0x0000000000000100: reasons.append("hw_thermal")
+                if throttle & 0x0000000000000200: reasons.append("hw_power_brake")
                 gpu_info["throttle_reasons"] = reasons if reasons else ["none"]
             except Exception:
                 pass
-            # Clock speeds
             try:
                 gpu_info["clock_graphics_mhz"] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_GRAPHICS)
                 gpu_info["clock_mem_mhz"] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
             except Exception:
                 pass
-            # Running processes on GPU
             try:
                 procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
                 gpu_procs = []
@@ -284,7 +311,6 @@ def get_gpu_metrics() -> dict:
             except Exception:
                 pass
-            # ECC errors
             try:
                 ecc_single = pynvml.nvmlDeviceGetTotalEccErrors(handle, pynvml.NVML_SINGLE_BIT_ECC, pynvml.NVML_VOLATILE_ECC)
                 ecc_double = pynvml.nvmlDeviceGetTotalEccErrors(handle, pynvml.NVML_DOUBLE_BIT_ECC, pynvml.NVML_VOLATILE_ECC)
@@ -300,7 +326,6 @@ def get_gpu_metrics() -> dict:
             "cuda_version": GPU_CUDA_VERSION,
             "device_count": device_count,
             "gpus": gpus,
-            # Flatten primary GPU for metrics table
             "gpu_temp": gpus[0]["temp_c"] if gpus else None,
             "gpu_vram": gpus[0]["vram_used_gb"] if gpus else None,
         }
@@ -308,10 +333,11 @@ def get_gpu_metrics() -> dict:
         return {"error": str(e)}
-# --- PM2 Integration ---
+# =============================================================================
+# PM2 Integration
+# =============================================================================
-def get_pm2_status() -> dict:
-    """Get full pm2 process list with details."""
+def get_pm2_status():
     raw = run_cmd("pm2 jlist 2>/dev/null")
     if not raw or raw.startswith("[error") or raw.startswith("[timeout"):
         return {"available": False, "raw": raw}
@@ -340,37 +366,22 @@ def get_pm2_status() -> dict:
                 "instances": env.get("instances"),
                 "exit_code": env.get("exit_code"),
             }
-            # Get recent logs for this process
-            recent_logs = run_cmd(f"pm2 logs {p.get('name')} --nostream --lines 30 2>/dev/null", timeout=5)
-            if recent_logs and not recent_logs.startswith("["):
-                proc_info["recent_logs"] = recent_logs[-3000:]  # Last 3KB
-            # Get recent error logs
-            err_log_path = env.get("pm_err_log_path")
-            if err_log_path and os.path.exists(err_log_path):
-                try:
-                    with open(err_log_path, "r") as f:
-                        lines = f.readlines()
-                        proc_info["recent_errors"] = "".join(lines[-50:])[-3000:]
-                except Exception:
-                    pass
             result["processes"].append(proc_info)
         return result
     except json.JSONDecodeError:
         return {"available": True, "raw_output": raw[:2000]}
-def get_pm2_logs(lines: int = 100) -> str:
-    """Get combined pm2 logs."""
-    output = run_cmd(f"pm2 logs --nostream --lines {lines} 2>/dev/null", timeout=10)
+def get_pm2_logs(lines=100):
+    output = run_cmd("pm2 logs --nostream --lines {} 2>/dev/null".format(lines), timeout=10)
     return output[-5000:] if output else ""
-# --- System Metrics (Deep) ---
+# =============================================================================
+# System Metrics (Deep)
+# =============================================================================
-def get_disk_io() -> dict:
-    """Get disk I/O stats."""
+def get_disk_io():
     try:
         io_counters = psutil.disk_io_counters(perdisk=False)
         if io_counters:
@@ -387,11 +398,8 @@ def get_disk_io() -> dict:
     return {}
-def get_network_details() -> dict:
-    """Get detailed network info."""
+def get_network_details():
     result = {}
-    # Per-interface stats
     try:
         net_io = psutil.net_io_counters(pernic=True)
         interfaces = {}
@@ -412,7 +420,6 @@ def get_network_details() -> dict:
     except Exception:
         pass
-    # Connection states
     try:
         connections = psutil.net_connections(kind="tcp")
         states = {}
@@ -424,7 +431,6 @@ def get_network_details() -> dict:
     except Exception:
         pass
-    # Listening ports
     try:
         listening = []
         for conn in psutil.net_connections(kind="tcp"):
@@ -438,7 +444,6 @@ def get_network_details() -> dict:
     except Exception:
         pass
-    # DNS check
     try:
         start = time.time()
         socket.getaddrinfo("google.com", 80)
@@ -449,8 +454,7 @@ def get_network_details() -> dict:
     return result
-def get_top_processes(n: int = 15) -> list:
-    """Get top N processes by CPU and memory."""
+def get_top_processes(n=15):
     procs = []
     for p in psutil.process_iter(["pid", "name", "cpu_percent", "memory_percent", "status", "num_fds", "num_threads", "create_time", "cmdline"]):
         try:
@@ -473,8 +477,7 @@ def get_top_processes(n: int = 15) -> list:
     return procs[:n]
-def get_zombie_processes() -> list:
-    """Find zombie/defunct processes."""
+def get_zombie_processes():
     zombies = []
     for p in psutil.process_iter(["pid", "name", "status", "ppid"]):
         try:
@@ -489,8 +492,7 @@ def get_zombie_processes() -> list:
     return zombies
-def get_disk_details() -> list:
-    """Get all mount point usage and inode info."""
+def get_disk_details():
     mounts = []
     for part in psutil.disk_partitions(all=False):
         try:
@@ -504,9 +506,8 @@ def get_disk_details() -> list:
                 "free_gb": round(usage.free / 1e9, 1),
                 "pct": usage.percent,
             }
-            # Inode usage (Linux)
             try:
-                inode_output = run_cmd(f"df -i {part.mountpoint} | tail -1", timeout=3)
+                inode_output = run_cmd("df -i {} | tail -1".format(part.mountpoint), timeout=3)
                 parts = inode_output.split()
                 if len(parts) >= 5:
                     info["inodes_used"] = parts[2]
@@ -520,12 +521,10 @@ def get_disk_details() -> list:
     return mounts
-def get_systemd_failed() -> list:
-    """Get failed systemd units."""
+def get_systemd_failed():
     output = run_cmd("systemctl --failed --no-pager --plain 2>/dev/null", timeout=5)
     if not output or "0 loaded" in output:
         return []
     failed = []
     for line in output.split("\n"):
         line = line.strip()
@@ -536,8 +535,7 @@ def get_systemd_failed() -> list:
     return failed[:20]
-def get_docker_status() -> dict:
-    """Get Docker container info."""
+def get_docker_status():
     if not DOCKER_AVAILABLE:
         return {"available": False}
     try:
@@ -559,7 +557,6 @@ def get_docker_status() -> dict:
                     }
                 except Exception:
                     pass
             containers.append({
                 "name": c.name,
                 "image": c.image.tags[0] if c.image.tags else str(c.image.id)[:12],
@@ -572,8 +569,7 @@ def get_docker_status() -> dict:
         return {"available": False, "error": str(e)}
-def check_ssl_certs() -> list:
-    """Check SSL certificate expiry for configured domains."""
+def check_ssl_certs():
     results = []
     for domain in SSL_CHECK_DOMAINS:
         try:
@@ -596,45 +592,24 @@ def check_ssl_certs() -> list:
     return results
-def get_system_logs() -> dict:
-    """Get recent system logs (dmesg, syslog, auth)."""
+def get_system_logs():
     logs = {}
-    # dmesg (kernel messages — GPU errors, OOM kills show up here)
     dmesg = run_cmd("dmesg --time-format iso -T 2>/dev/null | tail -50", timeout=5)
     if dmesg and not dmesg.startswith("[error"):
         logs["dmesg"] = dmesg[-3000:]
-    # Check for OOM kills specifically
     oom = run_cmd("dmesg | grep -i 'oom\\|killed process\\|out of memory' | tail -10 2>/dev/null", timeout=5)
     if oom and not oom.startswith("[error"):
         logs["oom_kills"] = oom
-    # journalctl recent errors
     journal = run_cmd("journalctl -p err --since '1 hour ago' --no-pager -q 2>/dev/null | tail -30", timeout=5)
     if journal and not journal.startswith("[error"):
         logs["journal_errors"] = journal[-2000:]
-    # Auth log (failed logins, suspicious activity)
     auth = run_cmd("tail -20 /var/log/auth.log 2>/dev/null || tail -20 /var/log/secure 2>/dev/null", timeout=5)
     if auth and not auth.startswith("[error"):
         logs["auth_log"] = auth[-1000:]
     return logs
-def get_firewall_rules() -> str:
-    """Get firewall rules summary."""
-    # Try ufw first, then iptables
-    ufw = run_cmd("ufw status verbose 2>/dev/null", timeout=5)
-    if ufw and "Status:" in ufw:
-        return ufw[:2000]
-    ipt = run_cmd("iptables -L -n --line-numbers 2>/dev/null | head -40", timeout=5)
-    return ipt[:2000] if ipt else ""
-def get_env_sanitized() -> dict:
-    """Get relevant environment variables with secrets redacted."""
+def get_env_sanitized():
     relevant_prefixes = [
         "NODE_", "PYTHON", "PATH", "HOME", "USER", "SHELL", "LANG",
         "CUDA", "NVIDIA", "GPU", "LD_LIBRARY", "VIRTUAL_ENV", "CONDA",
@@ -643,7 +618,6 @@ def get_env_sanitized() -> dict:
     env = {}
     for key, value in os.environ.items():
         if any(key.startswith(p) for p in relevant_prefixes):
-            # Redact anything that looks like a secret
             if any(s in key.upper() for s in ["KEY", "SECRET", "TOKEN", "PASS", "AUTH"]):
                 env[key] = "[REDACTED]"
             else:
@@ -651,19 +625,14 @@ def get_env_sanitized() -> dict:
     return env
-def get_installed_packages() -> dict:
-    """Get installed Python and Node packages."""
+def get_installed_packages():
     pkgs = {}
-    # Python packages
     pip_list = run_cmd("pip list --format=json 2>/dev/null", timeout=10)
     if pip_list and pip_list.startswith("["):
         try:
             pkgs["python"] = {p["name"]: p["version"] for p in json.loads(pip_list)}
         except Exception:
             pass
-    # Node packages (global)
     npm_list = run_cmd("npm list -g --depth=0 --json 2>/dev/null", timeout=10)
     if npm_list and npm_list.startswith("{"):
         try:
@@ -671,27 +640,32 @@ def get_installed_packages() -> dict:
             pkgs["node_global"] = {k: v.get("version", "?") for k, v in deps.items()}
         except Exception:
             pass
-    # pm2 ecosystem config
     pm2_conf = run_cmd("cat ecosystem.config.js 2>/dev/null || cat ecosystem.config.cjs 2>/dev/null", timeout=3)
     if pm2_conf and not pm2_conf.startswith("[error"):
         pkgs["pm2_ecosystem"] = pm2_conf[:3000]
     return pkgs
-# --- File Watcher Thread ---
+def get_firewall_rules():
+    ufw = run_cmd("ufw status verbose 2>/dev/null", timeout=5)
+    if ufw and "Status:" in ufw:
+        return ufw[:2000]
+    ipt = run_cmd("iptables -L -n --line-numbers 2>/dev/null | head -40", timeout=5)
+    return ipt[:2000] if ipt else ""
-class LogFileWatcher(threading.Thread):
-    """Watch log files for new lines and send them."""
-    def __init__(self, files: list):
+# =============================================================================
+# File Watcher Thread (with deduplication + agent log exclusion)
+# =============================================================================
+class LogFileWatcher(threading.Thread):
+    def __init__(self, files):
         super().__init__(daemon=True)
         self.files = files
         self.positions = {}
+        self._recent_hashes = deque(maxlen=500)
     def run(self):
-        # Initialize positions to end of files
         for f in self.files:
             try:
                 self.positions[f] = os.path.getsize(f)
@@ -705,10 +679,16 @@ class LogFileWatcher(threading.Thread):
                     if size > self.positions.get(filepath, 0):
                         with open(filepath, "r") as fh:
                             fh.seek(self.positions[filepath])
-                            new_lines = fh.read(10000)  # Max 10KB per read
+                            new_lines = fh.read(10000)
                             self.positions[filepath] = fh.tell()
                         if new_lines.strip():
+                            # Deduplicate
+                            msg_hash = hash(new_lines.strip()[:300])
+                            if msg_hash in self._recent_hashes:
+                                continue
+                            self._recent_hashes.append(msg_hash)
                             level = "info"
                             lower = new_lines.lower()
                             if any(kw in lower for kw in ["error", "exception", "traceback", "failed", "critical"]):
@@ -722,38 +702,35 @@ class LogFileWatcher(threading.Thread):
                                 "platform": PLATFORM,
                                 "version": VERSION,
                                 "level": level,
-                                "message": f"[file:{os.path.basename(filepath)}] {new_lines.strip()[:2000]}",
+                                "message": "[file:{}] {}".format(os.path.basename(filepath), new_lines.strip()[:2000]),
                                 "context": {"capturedFrom": "file_watcher", "file": filepath},
                             })
                     elif size < self.positions.get(filepath, 0):
-                        # File was truncated/rotated
                         self.positions[filepath] = 0
                 except Exception:
                     pass
             time.sleep(2)
-# --- Collect Everything ---
+# =============================================================================
+# Collect Metrics
+# =============================================================================
-def collect_full_metrics() -> dict:
-    """Collect absolutely everything."""
+def collect_full_metrics():
     cpu = psutil.cpu_percent(interval=1)
     cpu_per_core = psutil.cpu_percent(interval=0, percpu=True)
     mem = psutil.virtual_memory()
     swap = psutil.swap_memory()
     data = {
         "cpu": cpu,
         "memory": mem.percent,
         "network_latency": 0,
         "custom": {
-            # CPU deep
             "cpu_per_core": cpu_per_core,
             "cpu_count_logical": psutil.cpu_count(),
             "cpu_count_physical": psutil.cpu_count(logical=False),
             "load_avg": list(os.getloadavg()) if hasattr(os, "getloadavg") else [],
-            # Memory deep
             "memory_used_gb": round(mem.used / 1e9, 2),
             "memory_total_gb": round(mem.total / 1e9, 2),
             "memory_available_gb": round(mem.available / 1e9, 2),
@@ -762,12 +739,8 @@ def collect_full_metrics() -> dict:
             "swap_used_gb": round(swap.used / 1e9, 2),
             "swap_total_gb": round(swap.total / 1e9, 2),
             "swap_pct": swap.percent,
-            # Disk I/O
             "disk_io": get_disk_io(),
-            # OS info
-            "os": f"{os.uname().sysname} {os.uname().release}" if hasattr(os, "uname") else "unknown",
+            "os": "{} {}".format(os.uname().sysname, os.uname().release) if hasattr(os, "uname") else "unknown",
             "hostname": socket.gethostname(),
             "python_version": sys.version.split()[0],
             "node_version": run_cmd("node --version 2>/dev/null"),
@@ -775,7 +748,6 @@ def collect_full_metrics() -> dict:
         },
     }
-    # CPU frequency
     try:
         freq = psutil.cpu_freq()
         if freq:
@@ -784,7 +756,6 @@ def collect_full_metrics() -> dict:
     except Exception:
         pass
-    # Context switches
     try:
         ctx = psutil.cpu_stats()
         data["custom"]["ctx_switches"] = ctx.ctx_switches
@@ -792,7 +763,6 @@ def collect_full_metrics() -> dict:
     except Exception:
         pass
-    # GPU
     gpu = get_gpu_metrics()
     if gpu:
         data["gpu_temp"] = gpu.get("gpu_temp")
@@ -802,8 +772,7 @@ def collect_full_metrics() -> dict:
     return data
-def collect_deep_snapshot() -> dict:
-    """Full system snapshot for variable inspector."""
+def collect_deep_snapshot():
     snapshot = {
         "system": {
             "hostname": socket.gethostname(),
@@ -811,7 +780,7 @@ def collect_deep_snapshot() -> dict:
             "cpu_count": psutil.cpu_count(),
             "memory_total_gb": round(psutil.virtual_memory().total / 1e9, 2),
             "python_version": sys.version.split()[0],
-            "os": f"{os.uname().sysname} {os.uname().release}" if hasattr(os, "uname") else "unknown",
+            "os": "{} {}".format(os.uname().sysname, os.uname().release) if hasattr(os, "uname") else "unknown",
             "kernel": run_cmd("uname -r 2>/dev/null"),
         },
         "gpu": get_gpu_metrics() if GPU_AVAILABLE else {"available": False},
@@ -825,28 +794,23 @@ def collect_deep_snapshot() -> dict:
         "firewall": get_firewall_rules(),
         "environment": get_env_sanitized(),
     }
-    # Job state
     state = job_tracker.get_state()
     if state:
         snapshot["current_job"] = state
     return snapshot
-# --- Job State Tracking ---
+# =============================================================================
+# Job State Tracking
+# =============================================================================
 class JobTracker:
-    """Track job states from BullMQ or similar queue systems."""
     def __init__(self):
         self.current_job = None
         self.job_history = deque(maxlen=50)
         self.lock = threading.Lock()
-    def update(self, job_id: str, status: str, progress: float = 0,
-               last_action: str = "", error: str | None = None,
-               metadata: dict | None = None):
+    def update(self, job_id, status, progress=0, last_action="", error=None, metadata=None):
         with self.lock:
             job = {
                 "job_id": job_id,
@@ -860,11 +824,11 @@ class JobTracker:
             self.current_job = job
             self.job_history.append(job)
-    def get_state(self) -> dict | None:
+    def get_state(self):
         with self.lock:
             return self.current_job.copy() if self.current_job else None
-    def get_history(self, n: int = 20) -> list:
+    def get_history(self, n=20):
         with self.lock:
             return list(self.job_history)[-n:]
@@ -881,14 +845,16 @@ class JobTracker:
         })
-# --- Public API for External Use ---
+# =============================================================================
+# Public API
+# =============================================================================
 job_tracker = JobTracker()
 stdout_capture = None
 stderr_capture = None
-def send_log(level: str, message: str, context: dict = None):
+def send_log(level, message, context=None):
     send({
         "type": "log",
         "source": SOURCE_NAME,
@@ -900,7 +866,7 @@ def send_log(level: str, message: str, context: dict = None):
     })
-def send_error(title: str, stack_trace: str = "", context: dict = None):
+def send_error(title, stack_trace="", context=None):
     send({
         "type": "error",
         "source": SOURCE_NAME,
@@ -912,74 +878,236 @@ def send_error(title: str, stack_trace: str = "", context: dict = None):
     })
-def update_job(job_id: str, status: str, progress: float = 0,
-               last_action: str = "", error: str = None, metadata: dict = None):
+def update_job(job_id, status, progress=0, last_action="", error=None, metadata=None):
     job_tracker.update(job_id, status, progress, last_action, error, metadata)
     job_tracker.send_update()
     if error:
-        send_error(f"Job {job_id} failed: {error}", context={
+        send_error("Job {} failed: {}".format(job_id, error), context={
             "job_id": job_id, "status": status, "last_action": last_action,
             **(metadata or {}),
         })
-def capture_image_gen(job_id: str, model: str, params: dict,
-                      result: dict = None, error: str = None,
-                      duration_s: float = None):
-    """Specialized capture for image generation pipelines."""
-    context = {
-        "job_id": job_id,
-        "model": model,
-        "params": params,
-        "duration_s": duration_s,
-    }
+def capture_image_gen(job_id, model, params, result=None, error=None, duration_s=None):
+    context = {"job_id": job_id, "model": model, "params": params, "duration_s": duration_s}
     if result:
         context["result"] = result
-        send_log("info", f"Image gen complete: {model} ({duration_s:.1f}s)", context)
+        send_log("info", "Image gen complete: {} ({:.1f}s)".format(model, duration_s or 0), context)
     if error:
         context["error"] = error
-        send_error(f"Image gen failed: {model} — {error}", context=context)
+        send_error("Image gen failed: {} — {}".format(model, error), context=context)
     update_job(job_id, "completed" if result else "failed",
                progress=100 if result else 0,
-               last_action=f"generate:{model}",
-               error=error,
-               metadata=context)
+               last_action="generate:{}".format(model),
+               error=error, metadata=context)
+# =============================================================================
+# Action-Based Command Execution
+# =============================================================================
+def execute_action(action_id, timeout=30):
+    """Execute a predefined action and return result."""
+    cmd = ACTION_COMMANDS.get(action_id)
+    if not cmd:
+        return {
+            "output": "[Unknown action: {}]".format(action_id),
+            "exit_code": -1,
+            "duration_ms": 0,
+        }
+    start = time.time()
+    try:
+        result = subprocess.run(
+            cmd, shell=True, capture_output=True, text=True, timeout=timeout
+        )
+        duration_ms = int((time.time() - start) * 1000)
+        output = result.stdout
+        if result.stderr:
+            output += ("\n--- stderr ---\n" + result.stderr) if output else result.stderr
+        return {
+            "output": output.strip()[:50000],
+            "exit_code": result.returncode,
+            "duration_ms": duration_ms,
+        }
+    except subprocess.TimeoutExpired:
+        duration_ms = int((time.time() - start) * 1000)
+        return {
+            "output": "[Action timed out after {}s]".format(timeout),
+            "exit_code": -1,
+            "duration_ms": duration_ms,
+        }
+    except Exception as e:
+        duration_ms = int((time.time() - start) * 1000)
+        return {
+            "output": "[Execution error: {}]".format(e),
+            "exit_code": -1,
+            "duration_ms": duration_ms,
+        }
-# --- Main Loop ---
+def poll_and_execute_commands():
+    """Poll for pending actions and execute them."""
+    if not POLL_COMMANDS_URL:
+        return
+    try:
+        resp = session.post(
+            POLL_COMMANDS_URL,
+            json={"action": "poll", "source_name": SOURCE_NAME},
+            headers=HEADERS,
+            timeout=10,
+        )
+        if resp.status_code != 200:
+            return
+        data = resp.json()
+        commands = data.get("commands", [])
+        settings = data.get("settings") or {}
+        max_timeout = settings.get("max_timeout_s", 30)
+        for cmd_entry in commands:
+            cmd_id = cmd_entry["id"]
+            cmd_str = cmd_entry["command"]
+            # Only accept ACTION: prefixed commands
+            if not cmd_str.startswith("ACTION:"):
+                logger.warning("Rejected non-action command: %s", cmd_str[:50])
+                session.post(
+                    POLL_COMMANDS_URL,
+                    json={
+                        "action": "result",
+                        "command_id": cmd_id,
+                        "output": "[REJECTED] Only predefined actions are allowed. Raw commands are disabled.",
+                        "exit_code": -2,
+                        "duration_ms": 0,
+                        "source_name": SOURCE_NAME,
+                    },
+                    headers=HEADERS,
+                    timeout=10,
+                )
+                continue
+            action_id = cmd_str.replace("ACTION:", "")
+            if action_id not in ACTION_COMMANDS:
+                logger.warning("Unknown action: %s", action_id)
+                session.post(
+                    POLL_COMMANDS_URL,
+                    json={
+                        "action": "result",
+                        "command_id": cmd_id,
+                        "output": "[REJECTED] Unknown action: {}".format(action_id),
+                        "exit_code": -2,
+                        "duration_ms": 0,
+                        "source_name": SOURCE_NAME,
+                    },
+                    headers=HEADERS,
+                    timeout=10,
+                )
+                continue
+            logger.info("Executing action: %s (id=%s...)", action_id, cmd_id[:8])
+            # Claim
+            session.post(
+                POLL_COMMANDS_URL,
+                json={"action": "claim", "command_id": cmd_id},
+                headers=HEADERS,
+                timeout=10,
+            )
+            # Execute the mapped command
+            result = execute_action(action_id, timeout=min(max_timeout, 60))
+            logger.info("Action %s completed: exit_code=%s (%sms)", action_id, result["exit_code"], result["duration_ms"])
+            # Report result
+            session.post(
+                POLL_COMMANDS_URL,
+                json={
+                    "action": "result",
+                    "command_id": cmd_id,
+                    "output": result["output"],
+                    "exit_code": result["exit_code"],
+                    "duration_ms": result["duration_ms"],
+                    "source_name": SOURCE_NAME,
+                },
+                headers=HEADERS,
+                timeout=10,
+            )
+            # Log execution
+            send_log(
+                "info" if result["exit_code"] == 0 else "warn",
+                "[action] {} -> exit {} ({}ms)".format(action_id, result["exit_code"], result["duration_ms"]),
+                {"action": action_id, "exit_code": result["exit_code"], "command_id": cmd_id},
+            )
+    except Exception as e:
+        logger.debug("Command poll error: %s", e)
+# =============================================================================
+# PM2 Log Tracker (prevents re-sending same lines)
+# =============================================================================
+class PM2LogTracker:
+    """Tracks which PM2 log lines have been sent to prevent duplicates."""
+    def __init__(self):
+        self._seen_hashes = deque(maxlen=1000)
+    def get_new_errors(self, pm2_output):
+        """Return only error lines that haven't been seen before."""
+        new_errors = []
+        for line in pm2_output.split("\n"):
+            lower = line.lower()
+            if any(kw in lower for kw in ["error", "exception", "failed", "crash", "enoent", "eacces", "killed"]):
+                line_hash = hash(line.strip()[:200])
+                if line_hash not in self._seen_hashes:
+                    self._seen_hashes.append(line_hash)
+                    new_errors.append(line.strip()[:500])
+        return new_errors
+pm2_log_tracker = PM2LogTracker()
+# =============================================================================
+# Main Loop
+# =============================================================================
 def main():
     global stdout_capture, stderr_capture
-    # Capture stdout/stderr
     stdout_capture = StreamCapture(sys.stdout, "info")
     stderr_capture = StreamCapture(sys.stderr, "error")
     sys.stdout = stdout_capture
     sys.stderr = stderr_capture
-    logger.info(f"debugger.help Agent v{VERSION} — Ultimate Deep Debugger")
-    logger.info(f"Source: {SOURCE_NAME} | GPU: {'yes' if GPU_AVAILABLE else 'no'} | Docker: {'yes' if DOCKER_AVAILABLE else 'no'}")
-    logger.info(f"Interval: {INTERVAL}s | Endpoint: {INGEST_URL}")
+    logger.info("debugger.help Agent v%s — Action-Based Execution", VERSION)
+    logger.info("Source: %s | GPU: %s | Docker: %s", SOURCE_NAME, "yes" if GPU_AVAILABLE else "no", "yes" if DOCKER_AVAILABLE else "no")
+    logger.info("Interval: %ss | Endpoint: %s", INTERVAL, INGEST_URL)
+    logger.info("Registered actions: %s", ", ".join(sorted(ACTION_COMMANDS.keys())))
-    # Start file watchers
+    # Start file watchers — exclude agent's own log files
     watch_files = list(WATCH_LOG_FILES)
-    # Auto-discover pm2 log files
     pm2_log_dir = os.path.expanduser("~/.pm2/logs")
     if os.path.isdir(pm2_log_dir):
         pm2_logs = glob.glob(os.path.join(pm2_log_dir, "*.log"))
+        # Exclude the debugger agent's own log files to prevent feedback loop
+        pm2_logs = [f for f in pm2_logs if "debugger-agent" not in os.path.basename(f).lower()
+                    and "debugger_agent" not in os.path.basename(f).lower()]
         watch_files.extend(pm2_logs)
-        logger.info(f"Watching {len(pm2_logs)} pm2 log files")
+        logger.info("Watching %d pm2 log files (excluded agent logs)", len(pm2_logs))
     if watch_files:
         watcher = LogFileWatcher(watch_files)
         watcher.start()
-        logger.info(f"File watcher started for {len(watch_files)} files")
+        logger.info("File watcher started for %d files", len(watch_files))
     # Initial connection
     send({"type": "heartbeat", "source": SOURCE_NAME, "platform": PLATFORM, "version": VERSION})
-    send_log("info", f"Agent v{VERSION} started on {socket.gethostname()}", {
+    send_log("info", "Agent v{} started on {}".format(VERSION, socket.gethostname()), {
         "hostname": socket.gethostname(),
         "python_version": sys.version,
         "gpu_available": GPU_AVAILABLE,
@@ -987,9 +1115,10 @@ def main():
         "pid": os.getpid(),
         "gpu_driver": GPU_DRIVER_VERSION,
         "cuda_version": GPU_CUDA_VERSION,
+        "available_actions": list(ACTION_COMMANDS.keys()),
     })
-    # Send initial deep snapshot with packages
+    # Send initial deep snapshot
     try:
         pkgs = get_installed_packages()
         send({
@@ -1022,10 +1151,9 @@ def main():
             else:
                 consecutive_failures += 1
-            # Auto-reconnect backoff
             if consecutive_failures > 5:
                 backoff = min(consecutive_failures * 5, 60)
-                logger.warning(f"Connection issues. Backing off {backoff}s...")
+                logger.warning("Connection issues. Backing off %ss...", backoff)
                 time.sleep(backoff)
                 send({"type": "heartbeat", "source": SOURCE_NAME, "platform": PLATFORM, "version": VERSION})
                 consecutive_failures = 0
@@ -1042,15 +1170,13 @@ def main():
                     "variables": snapshot,
                 })
-            # PM2 logs check every ~30 seconds
+            # PM2 logs check every ~30s (with deduplication)
             if tick % 3 == 0:
                 pm2_logs = get_pm2_logs(50)
                 if pm2_logs:
-                    # Check for errors in pm2 logs
-                    for line in pm2_logs.split("\n"):
-                        lower = line.lower()
-                        if any(kw in lower for kw in ["error", "exception", "failed", "crash", "enoent", "eacces", "killed"]):
-                            send_log("error", f"[pm2] {line.strip()[:500]}", {"capturedFrom": "pm2_logs"})
+                    new_errors = pm2_log_tracker.get_new_errors(pm2_logs)
+                    for line in new_errors[:5]:  # Max 5 new errors per check
+                        send_log("error", "[pm2] {}".format(line), {"capturedFrom": "pm2_logs"})
             # System logs check every ~2 min
             if tick % 12 == 0:
@@ -1070,8 +1196,8 @@ def main():
                 for r in ssl_results:
                     if r.get("warning") or r.get("error"):
                         days = r.get("days_left")
-                        detail = r.get("error") or f"{days} days left"
-                        send_log("warn", f"SSL cert issue: {r.get('domain')} — {detail}", r)
+                        detail = r.get("error") or "{} days left".format(days)
+                        send_log("warn", "SSL cert issue: {} — {}".format(r.get("domain"), detail), r)
                 send({
                     "type": "inspect",
                     "source": SOURCE_NAME,
@@ -1080,18 +1206,19 @@ def main():
                     "variables": {"ssl_certs": ssl_results},
                 })
-            # GPU warnings every ~30 seconds
+            # GPU warnings every ~30s (only when thresholds exceeded)
             if GPU_AVAILABLE and tick % 3 == 0:
                 gpu = get_gpu_metrics()
                 gpus = gpu.get("gpus", [])
                 for g in gpus:
                     if g.get("temp_c", 0) > 85:
-                        send_log("warn", f"GPU {g['index']} temperature critical: {g['temp_c']}°C", g)
+                        send_log("warn", "GPU {} temperature critical: {}C".format(g["index"], g["temp_c"]), g)
                     if g.get("vram_pct", 0) > 90:
-                        send_log("warn", f"GPU {g['index']} VRAM critical: {g['vram_pct']}% ({g['vram_used_gb']}/{g['vram_total_gb']} GB)", g)
+                        send_log("warn", "GPU {} VRAM critical: {}% ({}/{} GB)".format(
+                            g["index"], g["vram_pct"], g["vram_used_gb"], g["vram_total_gb"]), g)
                     throttle = g.get("throttle_reasons", [])
                     if throttle and throttle != ["none"] and throttle != ["idle"]:
-                        send_log("warn", f"GPU {g['index']} throttling: {', '.join(throttle)}", g)
+                        send_log("warn", "GPU {} throttling: {}".format(g["index"], ", ".join(throttle)), g)
             # Heartbeat every ~5 min
             if tick % 30 == 0 and tick > 0:
@@ -1108,6 +1235,15 @@ def main():
                     "variables": {"installed_packages": pkgs},
                 })
+            # Poll for remote actions every tick
+            poll_and_execute_commands()
+            # Flush captured stdout/stderr
+            if stdout_capture:
+                stdout_capture.flush_pending()
+            if stderr_capture:
+                stderr_capture.flush_pending()
             tick += 1
             time.sleep(INTERVAL)
@@ -1118,7 +1254,7 @@ def main():
             sys.stderr = stderr_capture.original
             break
         except Exception as e:
-            logger.error(f"Main loop error: {e}")
+            logger.error("Main loop error: %s", e)
             send_error(str(e), traceback.format_exc())
             time.sleep(INTERVAL)

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: debugger-help
-Version: 3.0.2
+Version: 4.0.0
 Summary: debugger.help VPS Agent — Deep system monitoring for logs, GPU, PM2, Docker, and more
 Author: debugger.help
 License: MIT

{debugger_help-3.0.2 → debugger_help-4.0.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "debugger-help"
-version = "3.0.2"
+version = "4.0.0"
 description = "debugger.help VPS Agent — Deep system monitoring for logs, GPU, PM2, Docker, and more"
 readme = "README.md"
 license = {text = "MIT"}

{debugger_help-3.0.2 → debugger_help-4.0.0}/README.md RENAMED Viewed

File without changes

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help.egg-info/entry_points.txt RENAMED Viewed

File without changes

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help.egg-info/requires.txt RENAMED Viewed

File without changes

{debugger_help-3.0.2 → debugger_help-4.0.0}/debugger_help.egg-info/top_level.txt RENAMED Viewed

File without changes

{debugger_help-3.0.2 → debugger_help-4.0.0}/setup.cfg RENAMED Viewed

File without changes

debugger-help 3.0.2__tar.gz → 4.0.0__tar.gz

debugger-help 3.0.2tar.gz → 4.0.0tar.gz