debugger-help 4.1.1__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: debugger-help
3
- Version: 4.1.1
3
+ Version: 4.2.0
4
4
  Summary: debugger.help VPS Agent — Deep system monitoring for logs, GPU, PM2, Docker, and more
5
5
  Author: debugger.help
6
6
  License: MIT
@@ -1,11 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- debugger.help VPS Agent v4.0.0 — Action-Based Execution
3
+ debugger.help VPS Agent v4.2.0 — Action-Based Execution + Security Engine
4
4
 
5
- Key changes from v3:
6
- - Action-based command system: only predefined actions execute, no raw commands
7
- - Fixed error/warning log flood: deduplication + excluded agent's own logs
8
- - Deterministic execution: AI selects from action registry, agent maps to commands
5
+ Key changes from v4.1:
6
+ - Security engine: process classification, threat scoring, persistence scanning
7
+ - New security actions: kill_process, quarantine_path, disable_service, etc.
8
+ - Incident detection with auto-reporting
9
+ - Cost estimation for resource theft
9
10
  """
10
11
 
11
12
  import os
@@ -78,7 +79,7 @@ INGEST_URL = os.environ.get("DEBUGGER_INGEST_URL", "")
78
79
  SOURCE_NAME = os.environ.get("DEBUGGER_SOURCE", "vps-{}".format(socket.gethostname()))
79
80
  PLATFORM = os.environ.get("DEBUGGER_PLATFORM", "Python (VPS)")
80
81
  INTERVAL = int(os.environ.get("DEBUGGER_INTERVAL", "10"))
81
- VERSION = "4.1.0"
82
+ VERSION = "4.2.0"
82
83
 
83
84
  # Derive poll-commands URL from ingest URL
84
85
  POLL_COMMANDS_URL = INGEST_URL.replace("/ingest", "/poll-commands") if INGEST_URL else ""
@@ -195,7 +196,18 @@ COMFY_WORKFLOW_DIRS = [
195
196
  ]
196
197
 
197
198
  # Actions that are never auto-executed even if the server says so
198
- DANGEROUS_ACTIONS = {"restart_docker", "restart_comfy"}
199
+ DANGEROUS_ACTIONS = {
200
+ "restart_docker", "restart_comfy",
201
+ "kill_process", "quarantine_path", "disable_service",
202
+ "remove_cron", "restart_service", "block_outbound",
203
+ }
204
+
205
+ # Security action IDs — handled by the security module
206
+ SECURITY_ACTIONS = {
207
+ "kill_process", "quarantine_path", "disable_service",
208
+ "remove_cron", "restart_service", "block_outbound",
209
+ "list_connections", "persistence_scan", "security_scan",
210
+ }
199
211
 
200
212
 
201
213
  # =============================================================================
@@ -839,6 +851,27 @@ def collect_deep_snapshot():
839
851
  state = job_tracker.get_state()
840
852
  if state:
841
853
  snapshot["current_job"] = state
854
+
855
+ # Security classification (runs every deep snapshot)
856
+ try:
857
+ from debugger_help.security import classify_processes, compute_threat_score, estimate_cost_impact
858
+ classification = classify_processes()
859
+ gpu_metrics = snapshot.get("gpu") if GPU_AVAILABLE else None
860
+ threat = compute_threat_score(classification, gpu_metrics)
861
+ cost = estimate_cost_impact(gpu_metrics, classification)
862
+ snapshot["security"] = {
863
+ "threat_score": threat,
864
+ "process_classification": {
865
+ "summary": classification.get("summary"),
866
+ "malicious": classification.get("malicious", []),
867
+ "suspicious": classification.get("suspicious", [])[:20],
868
+ },
869
+ }
870
+ if cost:
871
+ snapshot["security"]["cost_impact"] = cost
872
+ except Exception as e:
873
+ snapshot["security"] = {"error": str(e)}
874
+
842
875
  return snapshot
843
876
 
844
877
 
@@ -1059,6 +1092,10 @@ def execute_action(action_id, timeout=30, payload=None):
1059
1092
  if action_id in COMFY_API_ACTIONS:
1060
1093
  return execute_comfy_api_action(action_id, payload=payload, timeout=timeout)
1061
1094
 
1095
+ # Handle security actions via security module
1096
+ if action_id in SECURITY_ACTIONS:
1097
+ return execute_security_action(action_id, payload=payload, timeout=timeout)
1098
+
1062
1099
  cmd = ACTION_COMMANDS.get(action_id)
1063
1100
  if not cmd:
1064
1101
  return {
@@ -1097,6 +1134,59 @@ def execute_action(action_id, timeout=30, payload=None):
1097
1134
  }
1098
1135
 
1099
1136
 
1137
+ def execute_security_action(action_id, payload=None, timeout=30):
1138
+ """Execute security-related actions via the security module."""
1139
+ from debugger_help.security import (
1140
+ action_kill_process, action_quarantine_path, action_disable_service,
1141
+ action_remove_cron, action_restart_service, action_block_outbound,
1142
+ action_list_connections, persistence_scan as run_persistence_scan,
1143
+ action_security_scan,
1144
+ )
1145
+
1146
+ start = time.time()
1147
+ try:
1148
+ if action_id == "kill_process":
1149
+ result = action_kill_process(payload)
1150
+ elif action_id == "quarantine_path":
1151
+ result = action_quarantine_path(payload)
1152
+ elif action_id == "disable_service":
1153
+ result = action_disable_service(payload)
1154
+ elif action_id == "remove_cron":
1155
+ result = action_remove_cron(payload)
1156
+ elif action_id == "restart_service":
1157
+ result = action_restart_service(payload)
1158
+ elif action_id == "block_outbound":
1159
+ result = action_block_outbound(payload)
1160
+ elif action_id == "list_connections":
1161
+ result = action_list_connections()
1162
+ elif action_id == "persistence_scan":
1163
+ scan = run_persistence_scan()
1164
+ result = {"success": True, "output": json.dumps(scan, indent=2)}
1165
+ elif action_id == "security_scan":
1166
+ gpu = get_gpu_metrics() if GPU_AVAILABLE else None
1167
+ result = action_security_scan(gpu)
1168
+ else:
1169
+ result = {"success": False, "output": "[Unknown security action: {}]".format(action_id)}
1170
+
1171
+ duration_ms = int((time.time() - start) * 1000)
1172
+ structured_output = json.dumps({
1173
+ "output": result.get("output", ""),
1174
+ "verification": result.get("verification"),
1175
+ })
1176
+ return {
1177
+ "output": structured_output[:50000],
1178
+ "exit_code": 0 if result.get("success") else 1,
1179
+ "duration_ms": duration_ms,
1180
+ }
1181
+ except Exception as e:
1182
+ duration_ms = int((time.time() - start) * 1000)
1183
+ return {
1184
+ "output": "[Security action error: {}]".format(e),
1185
+ "exit_code": -1,
1186
+ "duration_ms": duration_ms,
1187
+ }
1188
+
1189
+
1100
1190
  def poll_and_execute_commands():
1101
1191
  """Poll for pending actions and execute them."""
1102
1192
  if not POLL_COMMANDS_URL:
@@ -1150,7 +1240,7 @@ def poll_and_execute_commands():
1150
1240
  except (json.JSONDecodeError, IndexError):
1151
1241
  pass
1152
1242
 
1153
- if action_id not in ACTION_COMMANDS and action_id not in COMFY_API_ACTIONS:
1243
+ if action_id not in ACTION_COMMANDS and action_id not in COMFY_API_ACTIONS and action_id not in SECURITY_ACTIONS:
1154
1244
  logger.warning("Unknown action: %s", action_id)
1155
1245
  session.post(
1156
1246
  POLL_COMMANDS_URL,
@@ -1247,7 +1337,7 @@ def main():
1247
1337
  logger.info("debugger.help Agent v%s — Action-Based Execution", VERSION)
1248
1338
  logger.info("Source: %s | GPU: %s | Docker: %s", SOURCE_NAME, "yes" if GPU_AVAILABLE else "no", "yes" if DOCKER_AVAILABLE else "no")
1249
1339
  logger.info("Interval: %ss | Endpoint: %s", INTERVAL, INGEST_URL)
1250
- logger.info("Registered actions: %s", ", ".join(sorted(ACTION_COMMANDS.keys())))
1340
+ logger.info("Registered actions: %s", ", ".join(sorted(list(ACTION_COMMANDS.keys()) + list(SECURITY_ACTIONS))))
1251
1341
 
1252
1342
  # Start file watchers — exclude agent's own log files
1253
1343
  watch_files = list(WATCH_LOG_FILES)
@@ -1330,6 +1420,20 @@ def main():
1330
1420
  "variables": snapshot,
1331
1421
  })
1332
1422
 
1423
+ # Auto-report security incidents when threat score > 50
1424
+ security = snapshot.get("security", {})
1425
+ threat = security.get("threat_score", {})
1426
+ if isinstance(threat, dict) and threat.get("score", 0) > 50:
1427
+ malicious = security.get("process_classification", {}).get("malicious", [])
1428
+ send_log("error", "[SECURITY INCIDENT] Threat score: {} — {} malicious process(es) detected".format(
1429
+ threat["score"], len(malicious)
1430
+ ), {
1431
+ "type": "security_incident",
1432
+ "threat_score": threat,
1433
+ "malicious_processes": malicious[:5],
1434
+ "cost_impact": security.get("cost_impact"),
1435
+ })
1436
+
1333
1437
  # PM2 logs check every ~30s (with deduplication)
1334
1438
  if tick % 3 == 0:
1335
1439
  pm2_logs = get_pm2_logs(50)
@@ -0,0 +1,785 @@
1
+ """
2
+ Security Engine — Process classification, threat scoring, persistence scanning.
3
+ Part of the debugger.help incident response system.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import json
9
+ import time
10
+ import shutil
11
+ import signal
12
+ import subprocess
13
+ from datetime import datetime, timezone
14
+
15
+ try:
16
+ import psutil
17
+ except ImportError:
18
+ psutil = None
19
+
20
+
21
+ # =============================================================================
22
+ # Process Classification
23
+ # =============================================================================
24
+
25
+ ALLOWED_PROCESSES = {
26
+ # System
27
+ "systemd", "init", "sshd", "cron", "crond", "bash", "sh", "zsh", "login",
28
+ "su", "sudo", "rsyslogd", "journald", "udevd", "dbus-daemon", "polkitd",
29
+ "networkd", "resolved", "timesyncd", "agetty", "getty",
30
+ # Docker
31
+ "dockerd", "containerd", "containerd-shim", "runc", "docker-proxy",
32
+ # Node / PM2
33
+ "node", "npm", "npx", "pm2", "PM2", "yarn", "pnpm", "bun",
34
+ # Python
35
+ "python", "python3", "python3.10", "python3.11", "python3.12", "pip", "pip3",
36
+ # GPU / CUDA
37
+ "nvidia-smi", "nvidia-persistenced", "nvidia-cuda-mps",
38
+ # Our services
39
+ "yourstudio", "comfyui", "ComfyUI", "llama", "llama-server",
40
+ "chatterbox", "debugger-agent", "debugger_agent",
41
+ # Common system tools
42
+ "top", "htop", "ps", "grep", "find", "cat", "less", "tail", "head",
43
+ "curl", "wget", "git", "make", "gcc", "g++", "cc", "ld",
44
+ "tar", "gzip", "zip", "unzip", "rsync", "scp", "sftp",
45
+ "screen", "tmux", "vim", "nano", "vi",
46
+ # Monitoring
47
+ "prometheus", "grafana", "telegraf", "collectd",
48
+ # Web servers
49
+ "nginx", "apache2", "httpd", "caddy",
50
+ # Databases
51
+ "postgres", "postgresql", "mysql", "mysqld", "redis-server", "mongod",
52
+ }
53
+
54
+ BLOCKED_SIGNATURES = [
55
+ # Mining software
56
+ re.compile(r"xmrig", re.IGNORECASE),
57
+ re.compile(r"rigel", re.IGNORECASE),
58
+ re.compile(r"monero", re.IGNORECASE),
59
+ re.compile(r"kryptex", re.IGNORECASE),
60
+ re.compile(r"stratum\+", re.IGNORECASE),
61
+ re.compile(r"ethminer", re.IGNORECASE),
62
+ re.compile(r"phoenixminer", re.IGNORECASE),
63
+ re.compile(r"t-rex", re.IGNORECASE),
64
+ re.compile(r"nbminer", re.IGNORECASE),
65
+ re.compile(r"gminer", re.IGNORECASE),
66
+ re.compile(r"lolminer", re.IGNORECASE),
67
+ re.compile(r"claymore", re.IGNORECASE),
68
+ re.compile(r"bfgminer", re.IGNORECASE),
69
+ re.compile(r"cgminer", re.IGNORECASE),
70
+ re.compile(r"cpuminer", re.IGNORECASE),
71
+ re.compile(r"minerd", re.IGNORECASE),
72
+ re.compile(r"minergate", re.IGNORECASE),
73
+ # Mining arguments
74
+ re.compile(r"--coin\s+monero", re.IGNORECASE),
75
+ re.compile(r"--algo\s+random", re.IGNORECASE),
76
+ re.compile(r"--donate-level", re.IGNORECASE),
77
+ re.compile(r"pool\.(minergate|hashvault|nanopool|supportxmr|herominers)", re.IGNORECASE),
78
+ # Hidden path patterns (executables in dotdirs)
79
+ re.compile(r"/\.[a-z]+/(bin|lib|tmp)/", re.IGNORECASE),
80
+ re.compile(r"\.cache/[a-z]+miner", re.IGNORECASE),
81
+ re.compile(r"\.dbus/sessions?/[a-z0-9]+", re.IGNORECASE),
82
+ re.compile(r"\.local/share/[a-z]+min", re.IGNORECASE),
83
+ ]
84
+
85
+ # Mining pool ports
86
+ SUSPICIOUS_PORTS = {3333, 4444, 5555, 14433, 14444, 13333, 45700, 10128}
87
+
88
+
89
+ def _is_allowed_process(name, cmdline):
90
+ """Check if a process name or its base binary is in the allowed list."""
91
+ if not name:
92
+ return False
93
+ name_lower = name.lower()
94
+ for allowed in ALLOWED_PROCESSES:
95
+ if name_lower == allowed.lower():
96
+ return True
97
+ # Check if the binary in cmdline is allowed
98
+ if cmdline:
99
+ binary = os.path.basename(cmdline[0]) if cmdline else ""
100
+ for allowed in ALLOWED_PROCESSES:
101
+ if binary.lower() == allowed.lower():
102
+ return True
103
+ return False
104
+
105
+
106
+ def _check_blocked(cmdline_str):
107
+ """Check cmdline against blocked signatures. Returns list of matched patterns."""
108
+ matches = []
109
+ for pattern in BLOCKED_SIGNATURES:
110
+ if pattern.search(cmdline_str):
111
+ matches.append(pattern.pattern)
112
+ return matches
113
+
114
+
115
+ def classify_processes():
116
+ """Classify all running processes into allowed, suspicious, and malicious."""
117
+ if not psutil:
118
+ return {"error": "psutil not available", "allowed": [], "suspicious": [], "malicious": []}
119
+
120
+ allowed = []
121
+ suspicious = []
122
+ malicious = []
123
+
124
+ for proc in psutil.process_iter(["pid", "name", "cpu_percent", "memory_percent",
125
+ "status", "create_time", "cmdline", "username"]):
126
+ try:
127
+ info = proc.info
128
+ pid = info["pid"]
129
+ name = info["name"] or ""
130
+ cmdline = info.get("cmdline") or []
131
+ cmdline_str = " ".join(cmdline)
132
+ cpu_pct = info.get("cpu_percent") or 0
133
+ mem_pct = round(info.get("memory_percent") or 0, 1)
134
+ username = info.get("username") or ""
135
+ create_time = info.get("create_time") or 0
136
+
137
+ entry = {
138
+ "pid": pid,
139
+ "name": name,
140
+ "cmdline": cmdline_str[:300],
141
+ "cpu_pct": cpu_pct,
142
+ "mem_pct": mem_pct,
143
+ "username": username,
144
+ "started": datetime.fromtimestamp(create_time, tz=timezone.utc).isoformat() if create_time else None,
145
+ }
146
+
147
+ # Check blocked signatures first
148
+ blocked_matches = _check_blocked(cmdline_str)
149
+ if blocked_matches:
150
+ entry["confidence"] = 95
151
+ entry["reasoning"] = "Matches blocked signature: {}".format(", ".join(blocked_matches[:3]))
152
+ entry["matched_patterns"] = blocked_matches
153
+ malicious.append(entry)
154
+ continue
155
+
156
+ # Check if running from hidden path
157
+ if cmdline and len(cmdline) > 0:
158
+ exe_path = cmdline[0]
159
+ if re.search(r"/\.[a-z_]+/", exe_path) and not any(
160
+ ok in exe_path for ok in [".npm", ".nvm", ".pm2", ".local/bin", ".pyenv", ".cache/pip"]
161
+ ):
162
+ entry["confidence"] = 60
163
+ entry["reasoning"] = "Running from hidden directory: {}".format(exe_path[:100])
164
+ suspicious.append(entry)
165
+ continue
166
+
167
+ # Check if allowed
168
+ if _is_allowed_process(name, cmdline):
169
+ entry["confidence"] = 100
170
+ allowed.append(entry)
171
+ else:
172
+ # Unknown process — suspicious
173
+ entry["confidence"] = 30
174
+ entry["reasoning"] = "Unknown process not in allowed list"
175
+ suspicious.append(entry)
176
+
177
+ except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
178
+ continue
179
+
180
+ return {
181
+ "allowed": allowed,
182
+ "suspicious": suspicious,
183
+ "malicious": malicious,
184
+ "summary": {
185
+ "total": len(allowed) + len(suspicious) + len(malicious),
186
+ "allowed_count": len(allowed),
187
+ "suspicious_count": len(suspicious),
188
+ "malicious_count": len(malicious),
189
+ },
190
+ }
191
+
192
+
193
+ # =============================================================================
194
+ # Threat Scoring
195
+ # =============================================================================
196
+
197
+ def compute_threat_score(classification, gpu_metrics=None):
198
+ """Compute a weighted 0-100 threat score."""
199
+ score = 0
200
+ reasons = []
201
+
202
+ # Mining signatures detected (weight: 80)
203
+ malicious_count = len(classification.get("malicious", []))
204
+ if malicious_count > 0:
205
+ score += min(80, malicious_count * 40)
206
+ reasons.append("{} malicious process(es) detected".format(malicious_count))
207
+
208
+ # Suspicious processes in hidden paths (weight: 20)
209
+ hidden_path_count = sum(
210
+ 1 for s in classification.get("suspicious", [])
211
+ if "hidden directory" in (s.get("reasoning") or "")
212
+ )
213
+ if hidden_path_count > 0:
214
+ score += min(20, hidden_path_count * 10)
215
+ reasons.append("{} process(es) running from hidden paths".format(hidden_path_count))
216
+
217
+ # GPU saturation with unknown/malicious process (weight: 40)
218
+ if gpu_metrics:
219
+ gpus = gpu_metrics.get("gpus", [])
220
+ for gpu in gpus:
221
+ util = gpu.get("gpu_util_pct", 0)
222
+ if util > 90:
223
+ # Check if GPU processes are all known
224
+ gpu_procs = gpu.get("processes", [])
225
+ unknown_gpu_procs = []
226
+ for gp in gpu_procs:
227
+ gp_name = gp.get("name", "")
228
+ if not _is_allowed_process(gp_name, []):
229
+ unknown_gpu_procs.append(gp_name)
230
+ if unknown_gpu_procs:
231
+ score += 40
232
+ reasons.append("GPU at {}% with unknown process(es): {}".format(
233
+ util, ", ".join(unknown_gpu_procs[:3])
234
+ ))
235
+
236
+ # Suspicious outbound connections (weight: 30)
237
+ try:
238
+ suspicious_conns = _check_suspicious_connections()
239
+ if suspicious_conns:
240
+ score += min(30, len(suspicious_conns) * 15)
241
+ reasons.append("{} suspicious outbound connection(s)".format(len(suspicious_conns)))
242
+ except Exception:
243
+ pass
244
+
245
+ return {
246
+ "score": min(100, score),
247
+ "level": "critical" if score >= 70 else "high" if score >= 50 else "medium" if score >= 30 else "low",
248
+ "reasons": reasons,
249
+ }
250
+
251
+
252
+ def _check_suspicious_connections():
253
+ """Check for outbound connections to suspicious ports."""
254
+ suspicious = []
255
+ if not psutil:
256
+ return suspicious
257
+ try:
258
+ for conn in psutil.net_connections(kind="tcp"):
259
+ if conn.status == "ESTABLISHED" and conn.raddr:
260
+ if conn.raddr.port in SUSPICIOUS_PORTS:
261
+ proc_name = ""
262
+ try:
263
+ if conn.pid:
264
+ proc_name = psutil.Process(conn.pid).name()
265
+ except Exception:
266
+ pass
267
+ suspicious.append({
268
+ "pid": conn.pid,
269
+ "process": proc_name,
270
+ "remote_addr": "{}:{}".format(conn.raddr.ip, conn.raddr.port),
271
+ "local_port": conn.laddr.port if conn.laddr else None,
272
+ })
273
+ except Exception:
274
+ pass
275
+ return suspicious
276
+
277
+
278
+ # =============================================================================
279
+ # Persistence Scanner
280
+ # =============================================================================
281
+
282
+ def persistence_scan():
283
+ """Scan for persistence mechanisms and return findings with confidence scores."""
284
+ findings = []
285
+
286
+ # 1. Crontab entries
287
+ try:
288
+ cron_output = subprocess.run(
289
+ "crontab -l 2>/dev/null", shell=True, capture_output=True, text=True, timeout=5
290
+ ).stdout.strip()
291
+ if cron_output and "no crontab" not in cron_output.lower():
292
+ for i, line in enumerate(cron_output.split("\n")):
293
+ line = line.strip()
294
+ if not line or line.startswith("#"):
295
+ continue
296
+ confidence = 20
297
+ reasoning = "Standard cron entry"
298
+ blocked = _check_blocked(line)
299
+ if blocked:
300
+ confidence = 90
301
+ reasoning = "Cron entry matches blocked signature: {}".format(", ".join(blocked[:2]))
302
+ elif re.search(r"/\.[a-z_]+/", line):
303
+ confidence = 70
304
+ reasoning = "Cron entry references hidden directory"
305
+ elif re.search(r"(curl|wget)\s.*\|\s*(sh|bash)", line):
306
+ confidence = 80
307
+ reasoning = "Cron entry downloads and executes script"
308
+ elif re.search(r"@reboot", line):
309
+ confidence = 40
310
+ reasoning = "Runs at system boot"
311
+ findings.append({
312
+ "type": "crontab",
313
+ "line_number": i + 1,
314
+ "content": line[:300],
315
+ "confidence": confidence,
316
+ "reasoning": reasoning,
317
+ "suspicious": confidence >= 50,
318
+ })
319
+ except Exception as e:
320
+ findings.append({"type": "crontab", "error": str(e)})
321
+
322
+ # 2. Systemd services
323
+ try:
324
+ user_services = subprocess.run(
325
+ "ls /etc/systemd/system/*.service 2>/dev/null", shell=True,
326
+ capture_output=True, text=True, timeout=5
327
+ ).stdout.strip()
328
+ if user_services:
329
+ known_services = {
330
+ "docker.service", "nginx.service", "sshd.service", "cron.service",
331
+ "getty@.service", "NetworkManager.service", "systemd-resolved.service",
332
+ "ufw.service", "rsyslog.service", "snapd.service",
333
+ }
334
+ for svc_path in user_services.split("\n"):
335
+ svc_name = os.path.basename(svc_path.strip())
336
+ if not svc_name:
337
+ continue
338
+ confidence = 10
339
+ reasoning = "Known system service"
340
+ if svc_name not in known_services:
341
+ # Read ExecStart to check
342
+ try:
343
+ content = subprocess.run(
344
+ "cat {} 2>/dev/null".format(svc_path.strip()),
345
+ shell=True, capture_output=True, text=True, timeout=3
346
+ ).stdout
347
+ blocked = _check_blocked(content)
348
+ if blocked:
349
+ confidence = 90
350
+ reasoning = "Service matches blocked signature: {}".format(", ".join(blocked[:2]))
351
+ elif re.search(r"/\.[a-z_]+/", content):
352
+ confidence = 65
353
+ reasoning = "Service references hidden directory"
354
+ elif re.search(r"[a-z0-9]{20,}", svc_name):
355
+ confidence = 50
356
+ reasoning = "Service has random/obfuscated name"
357
+ else:
358
+ confidence = 25
359
+ reasoning = "Non-standard service"
360
+ except Exception:
361
+ confidence = 30
362
+ reasoning = "Could not read service file"
363
+
364
+ findings.append({
365
+ "type": "systemd_service",
366
+ "name": svc_name,
367
+ "path": svc_path.strip(),
368
+ "confidence": confidence,
369
+ "reasoning": reasoning,
370
+ "suspicious": confidence >= 50,
371
+ })
372
+ except Exception as e:
373
+ findings.append({"type": "systemd_service", "error": str(e)})
374
+
375
+ # 3. PM2 processes
376
+ try:
377
+ pm2_raw = subprocess.run(
378
+ "pm2 jlist 2>/dev/null", shell=True, capture_output=True, text=True, timeout=10
379
+ ).stdout.strip()
380
+ if pm2_raw and pm2_raw.startswith("["):
381
+ pm2_procs = json.loads(pm2_raw)
382
+ known_pm2 = {"yourstudio-gpu", "debugger-agent", "comfyui", "llama-server", "chatterbox"}
383
+ for proc in pm2_procs:
384
+ name = proc.get("name", "")
385
+ script = proc.get("pm2_env", {}).get("pm_exec_path", "")
386
+ confidence = 10
387
+ reasoning = "Known PM2 process"
388
+ if name.lower() not in known_pm2:
389
+ blocked = _check_blocked(name + " " + script)
390
+ if blocked:
391
+ confidence = 90
392
+ reasoning = "PM2 process matches blocked signature"
393
+ elif re.search(r"/\.[a-z_]+/", script):
394
+ confidence = 65
395
+ reasoning = "PM2 process runs from hidden directory"
396
+ else:
397
+ confidence = 30
398
+ reasoning = "Unknown PM2 process"
399
+ findings.append({
400
+ "type": "pm2_process",
401
+ "name": name,
402
+ "script": script[:200],
403
+ "status": proc.get("pm2_env", {}).get("status", "unknown"),
404
+ "confidence": confidence,
405
+ "reasoning": reasoning,
406
+ "suspicious": confidence >= 50,
407
+ })
408
+ except (json.JSONDecodeError, Exception) as e:
409
+ findings.append({"type": "pm2_process", "error": str(e)})
410
+
411
+ # 4. SSH authorized_keys
412
+ try:
413
+ auth_keys_paths = [
414
+ os.path.expanduser("~/.ssh/authorized_keys"),
415
+ "/root/.ssh/authorized_keys",
416
+ ]
417
+ for ak_path in auth_keys_paths:
418
+ if os.path.exists(ak_path):
419
+ with open(ak_path, "r") as f:
420
+ keys = f.readlines()
421
+ for i, key in enumerate(keys):
422
+ key = key.strip()
423
+ if not key or key.startswith("#"):
424
+ continue
425
+ parts = key.split()
426
+ comment = parts[-1] if len(parts) >= 3 else "no comment"
427
+ confidence = 20
428
+ reasoning = "SSH key with comment: {}".format(comment[:50])
429
+ # Check for suspicious patterns
430
+ if re.search(r"command=", key):
431
+ confidence = 60
432
+ reasoning = "SSH key with forced command — may be used for persistence"
433
+ elif comment in ("", "no comment") or re.search(r"[a-z0-9]{20,}@", comment):
434
+ confidence = 45
435
+ reasoning = "SSH key with generic/suspicious comment"
436
+ findings.append({
437
+ "type": "ssh_authorized_key",
438
+ "file": ak_path,
439
+ "line": i + 1,
440
+ "key_type": parts[0] if parts else "unknown",
441
+ "comment": comment[:100],
442
+ "confidence": confidence,
443
+ "reasoning": reasoning,
444
+ "suspicious": confidence >= 50,
445
+ })
446
+ except Exception as e:
447
+ findings.append({"type": "ssh_authorized_key", "error": str(e)})
448
+
449
+ suspicious_findings = [f for f in findings if f.get("suspicious")]
450
+ return {
451
+ "findings": findings,
452
+ "suspicious_count": len(suspicious_findings),
453
+ "total_checked": len(findings),
454
+ "summary": "Found {} suspicious persistence mechanism(s)".format(len(suspicious_findings))
455
+ if suspicious_findings else "No suspicious persistence mechanisms found",
456
+ }
457
+
458
+
459
+ # =============================================================================
460
+ # Security Action Handlers (with built-in verification)
461
+ # =============================================================================
462
+
463
+ QUARANTINE_DIR = "/root/quarantine"
464
+
465
+ # Safe parent directories for quarantine_path
466
+ ALLOWED_QUARANTINE_PARENTS = {"/tmp", "/home", "/root", "/var/tmp", "/opt", "/workspace"}
467
+
468
+
469
+ def _run_cmd(cmd, timeout=10):
470
+ """Run a shell command, return stdout+stderr."""
471
+ try:
472
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
473
+ return (result.stdout + result.stderr).strip()
474
+ except subprocess.TimeoutExpired:
475
+ return "[timeout after {}s]".format(timeout)
476
+ except Exception as e:
477
+ return "[error: {}]".format(e)
478
+
479
+
480
+ def action_kill_process(payload):
481
+ """Kill a process by PID with verification."""
482
+ pid = payload.get("pid") if payload else None
483
+ if not pid:
484
+ return {"success": False, "output": "Missing 'pid' in payload", "verification": {"check": "pid_exists", "passed": False, "details": "No PID provided"}}
485
+
486
+ pid = int(pid)
487
+
488
+ # Safety: don't kill ourselves or init
489
+ if pid in (os.getpid(), 1):
490
+ return {"success": False, "output": "Cannot kill this process (protected PID)", "verification": {"check": "protected", "passed": False, "details": "PID {} is protected".format(pid)}}
491
+
492
+ try:
493
+ proc = psutil.Process(pid)
494
+ proc_name = proc.name()
495
+ proc_cmdline = " ".join(proc.cmdline()[:5])
496
+ except psutil.NoSuchProcess:
497
+ return {"success": False, "output": "PID {} does not exist".format(pid), "verification": {"check": "pid_exists", "passed": True, "details": "Process already gone"}}
498
+
499
+ try:
500
+ os.kill(pid, signal.SIGKILL)
501
+ time.sleep(0.5)
502
+ except ProcessLookupError:
503
+ pass
504
+ except PermissionError:
505
+ return {"success": False, "output": "Permission denied killing PID {}".format(pid), "verification": {"check": "kill", "passed": False, "details": "Insufficient permissions"}}
506
+
507
+ # Verify
508
+ still_alive = psutil.pid_exists(pid)
509
+ return {
510
+ "success": not still_alive,
511
+ "output": "Killed process {} (PID {}, cmd: {})".format(proc_name, pid, proc_cmdline[:100]) if not still_alive else "Failed to kill PID {}".format(pid),
512
+ "verification": {
513
+ "check": "pid_gone",
514
+ "passed": not still_alive,
515
+ "details": "PID {} is {}".format(pid, "gone" if not still_alive else "STILL RUNNING"),
516
+ },
517
+ }
518
+
519
+
520
+ def action_quarantine_path(payload):
521
+ """Move a file/directory to quarantine instead of deleting."""
522
+ path = payload.get("path") if payload else None
523
+ if not path:
524
+ return {"success": False, "output": "Missing 'path' in payload", "verification": {"check": "path", "passed": False, "details": "No path provided"}}
525
+
526
+ path = os.path.abspath(path)
527
+
528
+ # Safety: only allow quarantining from specific parent directories
529
+ allowed = False
530
+ for parent in ALLOWED_QUARANTINE_PARENTS:
531
+ if path.startswith(parent + "/"):
532
+ allowed = True
533
+ break
534
+ if not allowed:
535
+ return {"success": False, "output": "Path {} is not in an allowed directory for quarantine".format(path), "verification": {"check": "allowed_path", "passed": False, "details": "Only paths under {} can be quarantined".format(", ".join(sorted(ALLOWED_QUARANTINE_PARENTS)))}}
536
+
537
+ if not os.path.exists(path):
538
+ return {"success": False, "output": "Path does not exist: {}".format(path), "verification": {"check": "exists", "passed": False, "details": "Path not found"}}
539
+
540
+ os.makedirs(QUARANTINE_DIR, exist_ok=True)
541
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
542
+ dest_name = "{}_{}".format(timestamp, os.path.basename(path))
543
+ dest = os.path.join(QUARANTINE_DIR, dest_name)
544
+
545
+ try:
546
+ shutil.move(path, dest)
547
+ except Exception as e:
548
+ return {"success": False, "output": "Failed to quarantine: {}".format(e), "verification": {"check": "move", "passed": False, "details": str(e)}}
549
+
550
+ source_gone = not os.path.exists(path)
551
+ dest_exists = os.path.exists(dest)
552
+ return {
553
+ "success": source_gone and dest_exists,
554
+ "output": "Quarantined {} -> {}".format(path, dest),
555
+ "verification": {
556
+ "check": "quarantine",
557
+ "passed": source_gone and dest_exists,
558
+ "details": "Source removed: {}, Quarantined at: {}".format(source_gone, dest),
559
+ },
560
+ }
561
+
562
+
563
+ def action_disable_service(payload):
564
+ """Stop and disable a systemd service with verification."""
565
+ service = payload.get("service") if payload else None
566
+ if not service:
567
+ return {"success": False, "output": "Missing 'service' in payload", "verification": {"check": "service", "passed": False, "details": "No service name provided"}}
568
+
569
+ # Sanitize
570
+ service = re.sub(r"[^a-zA-Z0-9_\-@.]", "", service)
571
+ if not service.endswith(".service"):
572
+ service += ".service"
573
+
574
+ output = _run_cmd("sudo systemctl stop {} && sudo systemctl disable {}".format(service, service), timeout=15)
575
+ time.sleep(1)
576
+
577
+ # Verify
578
+ status = _run_cmd("systemctl is-active {} 2>/dev/null".format(service), timeout=5)
579
+ is_inactive = status.strip() in ("inactive", "dead", "failed")
580
+ enabled = _run_cmd("systemctl is-enabled {} 2>/dev/null".format(service), timeout=5)
581
+ is_disabled = enabled.strip() in ("disabled", "masked")
582
+
583
+ return {
584
+ "success": is_inactive and is_disabled,
585
+ "output": "Service {}: status={}, enabled={}.\n{}".format(service, status.strip(), enabled.strip(), output),
586
+ "verification": {
587
+ "check": "service_state",
588
+ "passed": is_inactive and is_disabled,
589
+ "details": "Active: {}, Enabled: {}".format(status.strip(), enabled.strip()),
590
+ },
591
+ }
592
+
593
+
594
+ def action_remove_cron(payload):
595
+ """Remove a crontab entry by line number with verification."""
596
+ line_num = payload.get("line") if payload else None
597
+ if not line_num:
598
+ return {"success": False, "output": "Missing 'line' in payload", "verification": {"check": "line", "passed": False, "details": "No line number provided"}}
599
+
600
+ line_num = int(line_num)
601
+
602
+ cron = subprocess.run("crontab -l 2>/dev/null", shell=True, capture_output=True, text=True, timeout=5).stdout
603
+ lines = cron.split("\n")
604
+ if line_num < 1 or line_num > len(lines):
605
+ return {"success": False, "output": "Line {} out of range (crontab has {} lines)".format(line_num, len(lines)), "verification": {"check": "range", "passed": False, "details": "Invalid line number"}}
606
+
607
+ removed_line = lines[line_num - 1]
608
+ new_lines = [l for i, l in enumerate(lines) if i != line_num - 1]
609
+ new_cron = "\n".join(new_lines)
610
+
611
+ result = subprocess.run("echo '{}' | crontab -".format(new_cron.replace("'", "\\'")), shell=True, capture_output=True, text=True, timeout=5)
612
+
613
+ # Verify
614
+ updated = subprocess.run("crontab -l 2>/dev/null", shell=True, capture_output=True, text=True, timeout=5).stdout
615
+ line_gone = removed_line.strip() not in updated
616
+
617
+ return {
618
+ "success": line_gone,
619
+ "output": "Removed cron line {}: {}\nUpdated crontab:\n{}".format(line_num, removed_line.strip(), updated),
620
+ "verification": {
621
+ "check": "line_removed",
622
+ "passed": line_gone,
623
+ "details": "Line '{}' {}".format(removed_line.strip()[:80], "removed" if line_gone else "STILL PRESENT"),
624
+ },
625
+ }
626
+
627
+
628
+ def action_restart_service(payload):
629
+ """Restart a systemd service with verification."""
630
+ service = payload.get("service") if payload else None
631
+ if not service:
632
+ return {"success": False, "output": "Missing 'service' in payload", "verification": {"check": "service", "passed": False, "details": "No service name provided"}}
633
+
634
+ service = re.sub(r"[^a-zA-Z0-9_\-@.]", "", service)
635
+ if not service.endswith(".service"):
636
+ service += ".service"
637
+
638
+ output = _run_cmd("sudo systemctl restart {}".format(service), timeout=15)
639
+ time.sleep(3)
640
+
641
+ status = _run_cmd("systemctl is-active {} 2>/dev/null".format(service), timeout=5)
642
+ is_active = status.strip() == "active"
643
+
644
+ return {
645
+ "success": is_active,
646
+ "output": "Restart {}: {}\n{}".format(service, status.strip(), output),
647
+ "verification": {
648
+ "check": "service_active",
649
+ "passed": is_active,
650
+ "details": "Service status: {}".format(status.strip()),
651
+ },
652
+ }
653
+
654
+
655
+ def action_block_outbound(payload):
656
+ """Block outbound connections to an IP or port via UFW."""
657
+ ip = payload.get("ip") if payload else None
658
+ port = payload.get("port") if payload else None
659
+
660
+ if not ip and not port:
661
+ return {"success": False, "output": "Missing 'ip' or 'port' in payload", "verification": {"check": "params", "passed": False, "details": "Need ip or port"}}
662
+
663
+ if ip:
664
+ cmd = "sudo ufw deny out to {}".format(ip)
665
+ else:
666
+ cmd = "sudo ufw deny out {}".format(port)
667
+
668
+ output = _run_cmd(cmd, timeout=10)
669
+
670
+ # Verify
671
+ rules = _run_cmd("sudo ufw status verbose 2>/dev/null", timeout=5)
672
+ target = ip or str(port)
673
+ rule_exists = target in rules
674
+
675
+ return {
676
+ "success": rule_exists,
677
+ "output": "UFW rule: {}\n{}".format(cmd, output),
678
+ "verification": {
679
+ "check": "ufw_rule",
680
+ "passed": rule_exists,
681
+ "details": "Rule for {} {}".format(target, "found in UFW" if rule_exists else "NOT FOUND"),
682
+ },
683
+ }
684
+
685
+
686
+ def action_list_connections():
687
+ """List active network connections with process info."""
688
+ output = _run_cmd("ss -tunp", timeout=10)
689
+ # Also get structured data
690
+ connections = []
691
+ if psutil:
692
+ try:
693
+ for conn in psutil.net_connections(kind="tcp"):
694
+ if conn.status == "ESTABLISHED" and conn.raddr:
695
+ proc_name = ""
696
+ try:
697
+ if conn.pid:
698
+ proc_name = psutil.Process(conn.pid).name()
699
+ except Exception:
700
+ pass
701
+ entry = {
702
+ "pid": conn.pid,
703
+ "process": proc_name,
704
+ "local": "{}:{}".format(conn.laddr.ip, conn.laddr.port) if conn.laddr else "",
705
+ "remote": "{}:{}".format(conn.raddr.ip, conn.raddr.port),
706
+ "status": conn.status,
707
+ }
708
+ if conn.raddr.port in SUSPICIOUS_PORTS:
709
+ entry["suspicious"] = True
710
+ entry["reason"] = "Port {} is a known mining pool port".format(conn.raddr.port)
711
+ connections.append(entry)
712
+ except Exception:
713
+ pass
714
+
715
+ structured = json.dumps({
716
+ "connections": connections[:100],
717
+ "total": len(connections),
718
+ "suspicious": [c for c in connections if c.get("suspicious")],
719
+ }, indent=2)
720
+
721
+ return {
722
+ "success": True,
723
+ "output": "{}\n\n--- Structured ---\n{}".format(output[:10000], structured),
724
+ }
725
+
726
+
727
+ def action_security_scan(gpu_metrics=None):
728
+ """Full security scan: classify processes + persistence scan + threat score."""
729
+ classification = classify_processes()
730
+ persistence = persistence_scan()
731
+ threat = compute_threat_score(classification, gpu_metrics)
732
+
733
+ return {
734
+ "success": True,
735
+ "output": json.dumps({
736
+ "threat_score": threat,
737
+ "process_classification": {
738
+ "summary": classification.get("summary"),
739
+ "malicious": classification.get("malicious", []),
740
+ "suspicious": classification.get("suspicious", [])[:20],
741
+ },
742
+ "persistence": persistence,
743
+ }, indent=2),
744
+ }
745
+
746
+
747
+ # =============================================================================
748
+ # Cost Estimation
749
+ # =============================================================================
750
+
751
+ # Rough estimates — configurable via env
752
+ HOURLY_GPU_RATE = float(os.environ.get("DEBUGGER_GPU_HOURLY_RATE", "1.50")) # $/hr
753
+
754
+
755
+ def estimate_cost_impact(gpu_metrics=None, classification=None):
756
+ """Estimate resource waste from malicious processes."""
757
+ if not gpu_metrics or not classification:
758
+ return None
759
+
760
+ malicious = classification.get("malicious", [])
761
+ if not malicious:
762
+ return None
763
+
764
+ total_gpu_util = 0
765
+ total_cpu = 0
766
+ for proc in malicious:
767
+ total_cpu += proc.get("cpu_pct", 0)
768
+ # Rough GPU estimation — if process is in GPU process list
769
+
770
+ gpus = gpu_metrics.get("gpus", [])
771
+ for gpu in gpus:
772
+ gpu_procs = gpu.get("processes", [])
773
+ for gp in gpu_procs:
774
+ for mp in malicious:
775
+ if gp.get("pid") == mp.get("pid"):
776
+ total_gpu_util += gpu.get("gpu_util_pct", 0)
777
+
778
+ waste_per_hour = (total_gpu_util / 100.0) * HOURLY_GPU_RATE
779
+
780
+ return {
781
+ "gpu_util_stolen_pct": total_gpu_util,
782
+ "cpu_stolen_pct": round(total_cpu, 1),
783
+ "estimated_waste_per_hour_usd": round(waste_per_hour, 2),
784
+ "estimated_waste_per_day_usd": round(waste_per_hour * 24, 2),
785
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: debugger-help
3
- Version: 4.1.1
3
+ Version: 4.2.0
4
4
  Summary: debugger.help VPS Agent — Deep system monitoring for logs, GPU, PM2, Docker, and more
5
5
  Author: debugger.help
6
6
  License: MIT
@@ -2,6 +2,7 @@ README.md
2
2
  pyproject.toml
3
3
  debugger_help/__init__.py
4
4
  debugger_help/agent.py
5
+ debugger_help/security.py
5
6
  debugger_help.egg-info/PKG-INFO
6
7
  debugger_help.egg-info/SOURCES.txt
7
8
  debugger_help.egg-info/dependency_links.txt
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "debugger-help"
7
- version = "4.1.1"
7
+ version = "4.2.0"
8
8
  description = "debugger.help VPS Agent — Deep system monitoring for logs, GPU, PM2, Docker, and more"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
File without changes
File without changes