castrel-proxy 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -178,7 +178,7 @@ def pair(
178
178
  @app.command()
179
179
  def start(
180
180
  daemon: bool = typer.Option(
181
- True, "--daemon/--foreground", "-d/-f", help="Run in background (default) or foreground"
181
+ False, "--daemon/--foreground", "-d/-f", help="Run in background or foreground (default)"
182
182
  ),
183
183
  ):
184
184
  """
@@ -186,14 +186,14 @@ def start(
186
186
 
187
187
  Start bridge and connect to paired server.
188
188
 
189
- Run in background (default):
189
+ Run in foreground (default):
190
190
  castrel-proxy start
191
- castrel-proxy start --daemon
192
- castrel-proxy start -d
193
-
194
- Run in foreground:
195
191
  castrel-proxy start --foreground
196
192
  castrel-proxy start -f
193
+
194
+ Run in background:
195
+ castrel-proxy start --daemon
196
+ castrel-proxy start -d
197
197
  """
198
198
  config = get_config()
199
199
 
@@ -4,6 +4,7 @@ from .client_id import get_client_id, get_machine_metadata
4
4
  from .config import Config, ConfigError, get_config
5
5
  from .daemon import DaemonManager, get_daemon_manager
6
6
  from .executor import CommandExecutor, ExecutionResult
7
+ from .openclaw import OpenClawChecker, HealthStatus
7
8
 
8
9
  __all__ = [
9
10
  "get_client_id",
@@ -15,4 +16,6 @@ __all__ = [
15
16
  "get_daemon_manager",
16
17
  "CommandExecutor",
17
18
  "ExecutionResult",
19
+ "OpenClawChecker",
20
+ "HealthStatus",
18
21
  ]
@@ -4,7 +4,7 @@ Configuration File Management Module
4
4
  Handles reading, writing, and validating the ~/.castrel/config.yaml configuration file
5
5
  """
6
6
 
7
- from datetime import datetime
7
+ from datetime import datetime, UTC
8
8
  from pathlib import Path
9
9
  from typing import Optional
10
10
 
@@ -56,14 +56,58 @@ class Config:
56
56
  """
57
57
  self._ensure_config_dir()
58
58
 
59
+ # Load existing config to preserve openclaw settings
60
+ existing_config = {}
61
+ if self.config_file.exists():
62
+ try:
63
+ with open(self.config_file, "r", encoding="utf-8") as f:
64
+ existing_config = yaml.safe_load(f) or {}
65
+ except Exception:
66
+ # If loading fails, start with empty dict
67
+ existing_config = {}
68
+
59
69
  config_data = {
60
70
  "server_url": server_url,
61
71
  "verification_code": verification_code,
62
72
  "client_id": client_id,
63
73
  "workspace_id": workspace_id,
64
- "paired_at": datetime.utcnow().isoformat() + "Z",
74
+ "paired_at": datetime.now(UTC).isoformat() + "Z",
65
75
  }
66
76
 
77
+ # Preserve or initialize openclaw configuration
78
+ # If exists in existing config, preserve it; otherwise use default values from getters
79
+ # Note: openclaw_runtime_log_path should not be saved if empty, to allow date-based rotation
80
+
81
+ # openclaw_check_enabled
82
+ if "openclaw_check_enabled" in existing_config:
83
+ config_data["openclaw_check_enabled"] = existing_config["openclaw_check_enabled"]
84
+ else:
85
+ config_data["openclaw_check_enabled"] = False
86
+
87
+ # openclaw_config_path
88
+ if "openclaw_config_path" in existing_config:
89
+ config_data["openclaw_config_path"] = existing_config["openclaw_config_path"]
90
+ else:
91
+ config_data["openclaw_config_path"] = str(Path.home() / ".openclaw" / "openclaw.json")
92
+
93
+ # openclaw_runtime_log_path - only save if not empty
94
+ if "openclaw_runtime_log_path" in existing_config:
95
+ runtime_log_path = existing_config["openclaw_runtime_log_path"]
96
+ if runtime_log_path: # Only save if not empty
97
+ config_data["openclaw_runtime_log_path"] = runtime_log_path
98
+
99
+ # openclaw_gateway_log_path
100
+ if "openclaw_gateway_log_path" in existing_config:
101
+ config_data["openclaw_gateway_log_path"] = existing_config["openclaw_gateway_log_path"]
102
+ else:
103
+ config_data["openclaw_gateway_log_path"] = str(Path.home() / ".openclaw" / "logs" / "gateway.err.log")
104
+
105
+ # openclaw_agents_dir
106
+ if "openclaw_agents_dir" in existing_config:
107
+ config_data["openclaw_agents_dir"] = existing_config["openclaw_agents_dir"]
108
+ else:
109
+ config_data["openclaw_agents_dir"] = str(Path.home() / ".openclaw" / "agents")
110
+
67
111
  try:
68
112
  with open(self.config_file, "w", encoding="utf-8") as f:
69
113
  yaml.safe_dump(config_data, f, default_flow_style=False, allow_unicode=True)
@@ -148,6 +192,59 @@ class Config:
148
192
  """Get workspace ID"""
149
193
  return self.load()["workspace_id"]
150
194
 
195
+ def get_openclaw_check_enabled(self) -> bool:
196
+ """
197
+ Get OpenClaw check enabled status
198
+
199
+ Returns:
200
+ bool: True if OpenClaw check is enabled, False otherwise (default)
201
+ """
202
+ try:
203
+ config = self.load()
204
+ return config.get("openclaw_check_enabled", False)
205
+ except ConfigError:
206
+ # If config doesn't exist, return False
207
+ return False
208
+
209
+ def get_openclaw_config_path(self) -> str:
210
+ """
211
+ Get OpenClaw config file path
212
+
213
+ Returns:
214
+ str: Path to OpenClaw config file (~/.openclaw/openclaw.json)
215
+ """
216
+ try:
217
+ config = self.load()
218
+ return config.get("openclaw_config_path", str(Path.home() / ".openclaw" / "openclaw.json"))
219
+ except ConfigError:
220
+ return str(Path.home() / ".openclaw" / "openclaw.json")
221
+
222
+ def get_openclaw_gateway_log_path(self) -> str:
223
+ """
224
+ Get OpenClaw gateway error log path
225
+
226
+ Returns:
227
+ str: Path to OpenClaw gateway error log file
228
+ """
229
+ try:
230
+ config = self.load()
231
+ return config.get("openclaw_gateway_log_path", str(Path.home() / ".openclaw" / "logs" / "gateway.err.log"))
232
+ except ConfigError:
233
+ return str(Path.home() / ".openclaw" / "logs" / "gateway.err.log")
234
+
235
+ def get_openclaw_agents_dir(self) -> str:
236
+ """
237
+ Get OpenClaw agents directory path
238
+
239
+ Returns:
240
+ str: Path to OpenClaw agents directory
241
+ """
242
+ try:
243
+ config = self.load()
244
+ return config.get("openclaw_agents_dir", str(Path.home() / ".openclaw" / "agents"))
245
+ except ConfigError:
246
+ return str(Path.home() / ".openclaw" / "agents")
247
+
151
248
 
152
249
  # Global configuration instance
153
250
  _config = Config()
@@ -0,0 +1,562 @@
1
+ """
2
+ OpenClaw Check Module
3
+
4
+ Responsible for performing OpenClaw health checks and returning structured status information
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import re
10
+ from datetime import datetime, timedelta, timezone
11
+ from pathlib import Path
12
+ from typing import Dict, List, Optional
13
+
14
+ import file_read_backwards
15
+
16
+ from ..core.config import get_config
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class HealthStatus:
22
+ """Health status result"""
23
+
24
+ def __init__(
25
+ self,
26
+ status: str,
27
+ message: str,
28
+ details: Optional[Dict] = None,
29
+ ):
30
+ """
31
+ Initialize health status
32
+
33
+ Args:
34
+ status: Status level ("healthy", "warning", "error")
35
+ message: Human-readable status message
36
+ details: Additional check-specific parameters
37
+ """
38
+ self.status = status
39
+ self.message = message
40
+ self.details = details or {}
41
+
42
+ def to_dict(self) -> Dict:
43
+ """Convert to dictionary format"""
44
+ return {
45
+ "status": self.status,
46
+ "message": self.message,
47
+ "details": self.details,
48
+ }
49
+
50
+
51
+ class OpenClawChecker:
52
+ """OpenClaw health checker"""
53
+
54
+ def __init__(self):
55
+ """
56
+ Initialize OpenClaw checker
57
+ """
58
+ self.config = get_config()
59
+
60
+ # Get paths from config (runtime_log_path will be dynamic, see _get_runtime_log_path)
61
+ self.openclaw_config_path = Path(self.config.get_openclaw_config_path())
62
+ self.gateway_log_path = Path(self.config.get_openclaw_gateway_log_path())
63
+ self.agents_dir = Path(self.config.get_openclaw_agents_dir())
64
+
65
+ # Store configured runtime log path (may be empty for date-based rotation)
66
+ self._configured_runtime_log_path = None
67
+ self._load_configured_runtime_log_path()
68
+
69
+ # Load runtime log rules from file
70
+ self.runtime_log_rules = self._load_runtime_log_rules()
71
+
72
+ # Get maximum window minutes from rules
73
+ self._max_window_minutes = max(
74
+ (rule.get("window_minutes", 1) for rule in self.runtime_log_rules),
75
+ default=10
76
+ )
77
+
78
+ def _load_runtime_log_rules(self) -> List[Dict]:
79
+ """
80
+ Load runtime log rules from data file
81
+
82
+ Returns:
83
+ List[Dict]: List of check rules
84
+ """
85
+ try:
86
+ # Get path to rules file
87
+ rules_file = Path(__file__).parent.parent / "data" / "openclaw_runtime_log_rules.json"
88
+ with open(rules_file, "r", encoding="utf-8") as f:
89
+ rules = json.load(f)
90
+ logger.info(f"Loaded {len(rules)} OpenClaw runtime log rules")
91
+ return rules
92
+ except Exception as e:
93
+ logger.error(f"Failed to load runtime log rules: {e}", exc_info=True)
94
+ # Return empty list if rules file cannot be loaded
95
+ return []
96
+
97
+ def _load_configured_runtime_log_path(self):
98
+ """
99
+ Load configured runtime log path from config or OpenClaw config file
100
+
101
+ Note: If openclaw_runtime_log_path is explicitly set to empty string in castrel config,
102
+ it means use date-based rotation, so we won't try OpenClaw config file.
103
+ """
104
+ # First, try to get from castrel config
105
+ try:
106
+ config_data = self.config.load()
107
+ # Check if key exists (even if value is empty string)
108
+ if "openclaw_runtime_log_path" in config_data:
109
+ configured_path = config_data["openclaw_runtime_log_path"]
110
+ if configured_path: # Only set if not empty
111
+ self._configured_runtime_log_path = Path(configured_path)
112
+ logger.debug(f"Loaded runtime log path from castrel config: {self._configured_runtime_log_path}")
113
+ # If empty string, it means use date-based rotation, so don't try OpenClaw config
114
+ return
115
+ except Exception:
116
+ pass
117
+
118
+ # If not in castrel config, try OpenClaw config file
119
+ try:
120
+ if self.openclaw_config_path.exists():
121
+ with open(self.openclaw_config_path, "r", encoding="utf-8") as f:
122
+ config = json.load(f)
123
+ logging_config = config.get("logging", {})
124
+ log_file = logging_config.get("file")
125
+ if log_file:
126
+ self._configured_runtime_log_path = Path(log_file)
127
+ logger.debug(f"Loaded runtime log path from OpenClaw config: {self._configured_runtime_log_path}")
128
+ except Exception as e:
129
+ logger.warning(f"Failed to read OpenClaw config for log path: {e}")
130
+
131
+ def _get_runtime_log_path(self) -> Path:
132
+ """
133
+ Get current runtime log path (dynamic, based on current date if not configured)
134
+
135
+ Returns:
136
+ Path: Current runtime log path
137
+ """
138
+ # If configured path exists, use it
139
+ if self._configured_runtime_log_path:
140
+ return self._configured_runtime_log_path
141
+
142
+ # Otherwise, use date-based path (YYYY-MM-DD format, based on local time)
143
+ local_date = datetime.now().strftime("%Y-%m-%d")
144
+ return Path(f"/tmp/openclaw/openclaw-{local_date}.log")
145
+
146
+ def _parse_timestamp(self, timestamp_str: str) -> Optional[datetime]:
147
+ """
148
+ Parse timestamp from string (ISO format or Unix timestamp)
149
+
150
+ Args:
151
+ timestamp_str: Timestamp string
152
+
153
+ Returns:
154
+ Optional[datetime]: Parsed datetime (timezone-aware, UTC) or None if failed
155
+ """
156
+ try:
157
+ # Try ISO format first
158
+ dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
159
+ # Convert to UTC if timezone-aware, otherwise assume UTC
160
+ if dt.tzinfo is not None:
161
+ dt = dt.astimezone(timezone.utc)
162
+ else:
163
+ dt = dt.replace(tzinfo=timezone.utc)
164
+ return dt
165
+ except (ValueError, AttributeError):
166
+ try:
167
+ # Try Unix timestamp (milliseconds or seconds)
168
+ ts = float(timestamp_str)
169
+ if ts > 1e12: # Milliseconds
170
+ ts = ts / 1000
171
+ # fromtimestamp returns local time, convert to UTC
172
+ dt = datetime.fromtimestamp(ts, tz=timezone.utc)
173
+ return dt
174
+ except (ValueError, OSError):
175
+ return None
176
+
177
+ def _parse_jsonl_line(self, line: str) -> Optional[Dict]:
178
+ """
179
+ Parse a JSONL line
180
+
181
+ Args:
182
+ line: JSONL line string
183
+
184
+ Returns:
185
+ Optional[Dict]: Parsed JSON object or None if failed
186
+ """
187
+ try:
188
+ return json.loads(line.strip())
189
+ except (json.JSONDecodeError, AttributeError):
190
+ return None
191
+
192
+ def _read_lines_for_window(self, file_path: Path, now: datetime) -> List[str]:
193
+ """
194
+ Read lines from file covering the maximum window time using reverse reading
195
+
196
+ Strategy:
197
+ - Read backwards from end of file
198
+ - Collect lines until we reach a line older than (now - max_window_minutes)
199
+ - Reverse the collected lines to get chronological order
200
+
201
+ Args:
202
+ file_path: Path to file
203
+ now: Current datetime
204
+
205
+ Returns:
206
+ List[str]: List of lines covering the time window (in chronological order)
207
+ """
208
+ lines = []
209
+
210
+ if not file_path.exists():
211
+ return lines
212
+
213
+ try:
214
+ # Calculate cutoff time: lines older than this won't be included
215
+ cutoff_time = now - timedelta(minutes=self._max_window_minutes)
216
+
217
+ # Read backwards from end of file
218
+ # Limit to reasonable number of lines to avoid reading entire file if timestamps are missing
219
+ max_lines = 100000 # Safety limit
220
+ line_count = 0
221
+
222
+ with file_read_backwards.FileReadBackwards(file_path, encoding="utf-8") as f:
223
+ for line in f:
224
+ line_count += 1
225
+ if line_count > max_lines:
226
+ logger.warning(f"Reached max_lines limit ({max_lines}) when reading {file_path}")
227
+ break
228
+
229
+ # Parse line to check timestamp
230
+ log_entry = self._parse_jsonl_line(line)
231
+ if log_entry:
232
+ time_str = log_entry.get("time")
233
+ if time_str:
234
+ log_time = self._parse_timestamp(time_str)
235
+ if log_time:
236
+ # Stop if we've gone past the cutoff time
237
+ if log_time < cutoff_time:
238
+ break
239
+ # Add line to collection (newest first, no reversal needed)
240
+ lines.append(line)
241
+ else:
242
+ # Timestamp parsing failed, include the line anyway
243
+ lines.append(line)
244
+ else:
245
+ # No timestamp, include the line anyway (might be recent)
246
+ lines.append(line)
247
+ else:
248
+ # Failed to parse JSON, include the line anyway
249
+ lines.append(line)
250
+
251
+ # Keep lines in reverse order (newest first) - no reversal needed
252
+
253
+ except Exception as e:
254
+ logger.error(f"Failed to read lines from {file_path}: {e}", exc_info=True)
255
+
256
+ return lines
257
+
258
+ def _check_runtime_log(self, now: datetime) -> List[Dict]:
259
+ """
260
+ Check runtime log for issues
261
+
262
+ Args:
263
+ now: Current datetime
264
+
265
+ Returns:
266
+ List[Dict]: List of detected issues
267
+ """
268
+ issues = []
269
+
270
+ runtime_log_path = self._get_runtime_log_path()
271
+
272
+ if not runtime_log_path.exists():
273
+ return issues
274
+
275
+ try:
276
+ # Read lines covering the maximum window time
277
+ lines = self._read_lines_for_window(runtime_log_path, now)
278
+
279
+ if not lines:
280
+ return issues
281
+
282
+ # Check each rule
283
+ # Lines are in reverse order (newest first), so we iterate from newest to oldest
284
+ for rule in self.runtime_log_rules:
285
+ window_start = now - timedelta(minutes=rule["window_minutes"])
286
+ matches = 0
287
+
288
+ # Iterate from newest to oldest (lines are already in reverse order)
289
+ for line in lines:
290
+ # Parse JSONL line
291
+ log_entry = self._parse_jsonl_line(line)
292
+ if not log_entry:
293
+ continue
294
+
295
+ # Get timestamp
296
+ time_str = log_entry.get("time")
297
+ if not time_str:
298
+ continue
299
+
300
+ log_time = self._parse_timestamp(time_str)
301
+ if not log_time:
302
+ continue
303
+
304
+ # If we've gone past the window start, we can break (lines are in reverse order)
305
+ if log_time < window_start:
306
+ break
307
+
308
+ # Check if line matches pattern
309
+ log_text = json.dumps(log_entry, ensure_ascii=False)
310
+ if re.search(rule["pattern"], log_text, re.IGNORECASE):
311
+ matches += 1
312
+
313
+ # Check threshold
314
+ if matches >= rule["threshold"]:
315
+ issues.append(
316
+ {
317
+ "status": "error",
318
+ "message": rule["name"],
319
+ "details": {
320
+ "rule": rule["name"],
321
+ "matches": matches,
322
+ "threshold": rule["threshold"],
323
+ "window_minutes": rule["window_minutes"],
324
+ },
325
+ }
326
+ )
327
+
328
+ except Exception as e:
329
+ logger.error(f"Failed to check runtime log: {e}", exc_info=True)
330
+
331
+ return issues
332
+
333
+ def _check_gateway_log(self, now: datetime) -> Optional[Dict]:
334
+ """
335
+ Check gateway error log
336
+
337
+ Args:
338
+ now: Current datetime
339
+
340
+ Returns:
341
+ Optional[Dict]: Issue if detected, None otherwise
342
+ """
343
+ if not self.gateway_log_path.exists():
344
+ return None
345
+
346
+ try:
347
+ # Check if file was modified in the last minute
348
+ mtime = datetime.fromtimestamp(self.gateway_log_path.stat().st_mtime, tz=timezone.utc)
349
+ if mtime >= now - timedelta(minutes=1):
350
+ return {
351
+ "status": "error",
352
+ "message": "Daemon process error",
353
+ "details": {
354
+ "log_path": str(self.gateway_log_path),
355
+ "last_modified": mtime.isoformat(),
356
+ },
357
+ }
358
+ except Exception as e:
359
+ logger.error(f"Failed to check gateway log: {e}", exc_info=True)
360
+
361
+ return None
362
+
363
+ def _check_agent_logs(self, now: datetime) -> Optional[Dict]:
364
+ """
365
+ Check agent session logs
366
+
367
+ Args:
368
+ now: Current datetime
369
+
370
+ Returns:
371
+ Optional[Dict]: Issue if detected, None otherwise
372
+ """
373
+ if not self.agents_dir.exists():
374
+ return None
375
+
376
+ try:
377
+ # Find all JSONL files in sessions directories
378
+ session_files = []
379
+ for agent_dir in self.agents_dir.iterdir():
380
+ if not agent_dir.is_dir():
381
+ continue
382
+ sessions_dir = agent_dir / "sessions"
383
+ if not sessions_dir.exists():
384
+ continue
385
+
386
+ for jsonl_file in sessions_dir.glob("*.jsonl"):
387
+ # Check if file was modified in the last minute
388
+ try:
389
+ mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime, tz=timezone.utc)
390
+ if mtime >= now - timedelta(minutes=1):
391
+ session_files.append(jsonl_file)
392
+ except OSError:
393
+ continue
394
+
395
+ if not session_files:
396
+ return None
397
+
398
+ # Check logs from the past 5 minutes
399
+ window_start = now - timedelta(minutes=5)
400
+ total_lines = 0
401
+ error_lines = 0
402
+
403
+ for jsonl_file in session_files:
404
+ try:
405
+ # Read backwards from end of file, check lines from past 5 minutes
406
+ with file_read_backwards.FileReadBackwards(jsonl_file, encoding="utf-8") as f:
407
+ for line in f:
408
+ log_entry = self._parse_jsonl_line(line)
409
+ if not log_entry:
410
+ continue
411
+
412
+ # Get timestamp
413
+ timestamp = log_entry.get("timestamp")
414
+ if not timestamp:
415
+ continue
416
+
417
+ log_time = self._parse_timestamp(str(timestamp))
418
+ if not log_time:
419
+ continue
420
+
421
+ # If we've gone past the window start, we can break (reading backwards)
422
+ if log_time < window_start:
423
+ break
424
+
425
+ total_lines += 1
426
+
427
+ # Check if it's an error line
428
+ error_message = log_entry.get("errorMessage")
429
+ is_error = log_entry.get("isError", False)
430
+
431
+ if (error_message and error_message.strip()) or is_error:
432
+ error_lines += 1
433
+
434
+ except Exception as e:
435
+ logger.warning(f"Failed to read agent log {jsonl_file}: {e}")
436
+
437
+ # Check error rate
438
+ if total_lines > 0:
439
+ error_rate = error_lines / total_lines
440
+ if error_rate >= 0.3:
441
+ return {
442
+ "status": "error",
443
+ "message": "Agent failure rate too high",
444
+ "details": {
445
+ "error_rate": error_rate,
446
+ "error_lines": error_lines,
447
+ "total_lines": total_lines,
448
+ "checked_files": len(session_files),
449
+ },
450
+ }
451
+
452
+ except Exception as e:
453
+ logger.error(f"Failed to check agent logs: {e}", exc_info=True)
454
+
455
+ return None
456
+
457
+ async def check_all(self) -> HealthStatus:
458
+ """
459
+ Perform all OpenClaw checks
460
+
461
+ Returns:
462
+ HealthStatus: Overall health status with aggregated details
463
+ """
464
+ try:
465
+ all_details = {}
466
+ overall_status = "healthy"
467
+ messages = []
468
+
469
+ # Perform OpenClaw check
470
+ openclaw_status = await self.check_openclaw_status()
471
+ if openclaw_status:
472
+ all_details["openclaw"] = openclaw_status
473
+ if openclaw_status.get("status") != "healthy":
474
+ overall_status = openclaw_status.get("status", "warning")
475
+ messages.append(openclaw_status.get("message", ""))
476
+
477
+ # Determine overall message
478
+ if overall_status == "healthy":
479
+ message = "All checks passed"
480
+ else:
481
+ message = "; ".join(messages) if messages else "OpenClaw check issues detected"
482
+
483
+ return HealthStatus(
484
+ status=overall_status,
485
+ message=message,
486
+ details=all_details,
487
+ )
488
+
489
+ except Exception as e:
490
+ logger.error(f"OpenClaw check failed: {e}", exc_info=True)
491
+ return HealthStatus(
492
+ status="error",
493
+ message=f"OpenClaw check exception: {str(e)}",
494
+ details={},
495
+ )
496
+
497
+ async def check_openclaw_status(self) -> Optional[Dict]:
498
+ """
499
+ Check OpenClaw status (by reading files)
500
+
501
+ Returns:
502
+ Optional[Dict]: OpenClaw status, or None if check cannot be performed
503
+ """
504
+ try:
505
+ now = datetime.now(timezone.utc)
506
+ issues = []
507
+
508
+ # Check runtime log
509
+ runtime_issues = self._check_runtime_log(now)
510
+ issues.extend(runtime_issues)
511
+
512
+ # Check gateway log
513
+ gateway_issue = self._check_gateway_log(now)
514
+ if gateway_issue:
515
+ issues.append(gateway_issue)
516
+
517
+ # Check agent logs
518
+ agent_issue = self._check_agent_logs(now)
519
+ if agent_issue:
520
+ issues.append(agent_issue)
521
+
522
+ # Determine overall status
523
+ if issues:
524
+ # Get the most severe status
525
+ statuses = [issue.get("status", "warning") for issue in issues]
526
+ if "error" in statuses:
527
+ overall_status = "error"
528
+ else:
529
+ overall_status = "warning"
530
+
531
+ # Combine messages
532
+ messages = [issue.get("message", "") for issue in issues]
533
+ message = "; ".join(messages)
534
+
535
+ return {
536
+ "status": overall_status,
537
+ "message": message,
538
+ "details": {
539
+ "issues": issues,
540
+ "runtime_log_path": str(self._get_runtime_log_path()),
541
+ "gateway_log_path": str(self.gateway_log_path),
542
+ "agents_dir": str(self.agents_dir),
543
+ },
544
+ }
545
+ else:
546
+ return {
547
+ "status": "healthy",
548
+ "message": "All OpenClaw checks passed",
549
+ "details": {
550
+ "runtime_log_path": str(self._get_runtime_log_path()),
551
+ "gateway_log_path": str(self.gateway_log_path),
552
+ "agents_dir": str(self.agents_dir),
553
+ },
554
+ }
555
+
556
+ except Exception as e:
557
+ logger.error(f"OpenClaw status check failed: {e}", exc_info=True)
558
+ return {
559
+ "status": "error",
560
+ "message": f"OpenClaw status check failed: {str(e)}",
561
+ "details": {},
562
+ }
@@ -0,0 +1,50 @@
1
+ [
2
+ {
3
+ "pattern": "panic:|fatal|runtime error|segmentation fault|exit status",
4
+ "window_minutes": 1,
5
+ "threshold": 1,
6
+ "name": "Gateway process crash restart"
7
+ },
8
+ {
9
+ "pattern": "rpc server stopped|failed to serve rpc|listen tcp :4399",
10
+ "window_minutes": 1,
11
+ "threshold": 1,
12
+ "name": "RPC port disconnection"
13
+ },
14
+ {
15
+ "pattern": "all llm providers failed|All models failed|no available llm provider",
16
+ "window_minutes": 1,
17
+ "threshold": 1,
18
+ "name": "LLM model permanent failure"
19
+ },
20
+ {
21
+ "pattern": "rate limited|429 Too Many Requests|503 Service Unavailable",
22
+ "window_minutes": 1,
23
+ "threshold": 8,
24
+ "name": "Single model continuous 503/429"
25
+ },
26
+ {
27
+ "pattern": "tool execution timeout|context deadline exceeded.*tool",
28
+ "window_minutes": 2,
29
+ "threshold": 5,
30
+ "name": "Tool call continuous timeout"
31
+ },
32
+ {
33
+ "pattern": "created new page.*chrome",
34
+ "window_minutes": 5,
35
+ "threshold": 75,
36
+ "name": "Browser instance leak"
37
+ },
38
+ {
39
+ "pattern": "session ended unexpectedly|agent terminated by signal",
40
+ "window_minutes": 5,
41
+ "threshold": 5,
42
+ "name": "Agent session abnormal interruption"
43
+ },
44
+ {
45
+ "pattern": "tool.*failed permanently|(max retries exceeded.*(browser|google_search|fetch)|(browser|google_search|fetch).*max retries exceeded)",
46
+ "window_minutes": 10,
47
+ "threshold": 10,
48
+ "name": "Critical tool permanent failure"
49
+ }
50
+ ]
@@ -16,6 +16,8 @@ import aiohttp
16
16
 
17
17
  from ..operations import document
18
18
  from ..core.executor import CommandExecutor
19
+ from ..core.openclaw import OpenClawChecker
20
+ from ..core.config import get_config
19
21
  from ..mcp.manager import get_mcp_manager
20
22
  from ..security.whitelist import get_whitelist_file_path, is_command_allowed
21
23
 
@@ -55,6 +57,12 @@ class WebSocketClient:
55
57
  self.session: Optional[aiohttp.ClientSession] = None
56
58
  self.heartbeat_task: Optional[asyncio.Task] = None
57
59
  self.heartbeat_interval = 30.0 # 30seconds,Ensure enough heartbeats before server timeout
60
+ # OpenClaw check related
61
+ self.openclaw_checker = OpenClawChecker()
62
+ self.openclaw_check_enabled = get_config().get_openclaw_check_enabled()
63
+ self.openclaw_check_task: Optional[asyncio.Task] = None
64
+ self.openclaw_check_interval = 60.0 # OpenClaw check interval (seconds)
65
+ self.last_openclaw_status = None # Track last status to avoid duplicate notifications
58
66
 
59
67
  def _get_ws_url(self) -> str:
60
68
  """Get WebSocket URL"""
@@ -333,6 +341,119 @@ class WebSocketClient:
333
341
 
334
342
  logger.info(f"[CLIENT-HEARTBEAT-STOP] Heartbeat task stopped: client_id={self.client_id}")
335
343
 
344
+ async def _perform_openclaw_check(self) -> dict:
345
+ """
346
+ Perform OpenClaw check
347
+
348
+ Returns:
349
+ dict: OpenClaw status with the following structure:
350
+ {
351
+ "status": "healthy|warning|error",
352
+ "message": "Status description",
353
+ "details": {
354
+ # Check-specific parameters from OpenClawChecker
355
+ }
356
+ }
357
+ """
358
+ openclaw_status = await self.openclaw_checker.check_all()
359
+ return openclaw_status.to_dict()
360
+
361
+ async def _openclaw_check_loop(self):
362
+ """OpenClaw check loop - independent coroutine that sends notifications when issues are detected"""
363
+ logger.info(
364
+ f"[CLIENT-OPENCLAW-CHECK-START] OpenClaw check task started: "
365
+ f"interval={self.openclaw_check_interval}s, client_id={self.client_id}"
366
+ )
367
+
368
+ while self.running and self.ws and not self.ws.closed:
369
+ try:
370
+ # Perform OpenClaw check
371
+ status = await self._perform_openclaw_check()
372
+
373
+ # If issues detected and status changed, send notification
374
+ if status.get("status") != "healthy":
375
+ # Check if status changed (avoid duplicate notifications)
376
+ status_key = (status.get("status"), status.get("message"))
377
+ if status_key != self.last_openclaw_status:
378
+ await self._send_openclaw_notification(status)
379
+ self.last_openclaw_status = status_key
380
+ else:
381
+ # When recovered, if there was a previous issue, also send a notification
382
+ if self.last_openclaw_status is not None:
383
+ await self._send_openclaw_notification(status)
384
+ self.last_openclaw_status = None
385
+
386
+ # Wait for next check
387
+ await asyncio.sleep(self.openclaw_check_interval)
388
+
389
+ except Exception as e:
390
+ logger.error(
391
+ f"[CLIENT-OPENCLAW-CHECK-ERROR] OpenClaw check error: {e}, "
392
+ f"client_id={self.client_id}",
393
+ exc_info=True
394
+ )
395
+ # Continue running even if check fails
396
+ await asyncio.sleep(self.openclaw_check_interval)
397
+
398
+ logger.info(
399
+ f"[CLIENT-OPENCLAW-CHECK-STOP] OpenClaw check task stopped: "
400
+ f"client_id={self.client_id}"
401
+ )
402
+
403
+ async def _send_openclaw_notification(self, status: dict):
404
+ """
405
+ Send OpenClaw status notification message
406
+
407
+ Args:
408
+ status: OpenClaw status dictionary with structure:
409
+ {
410
+ "status": "healthy|warning|error",
411
+ "message": "Status description",
412
+ "details": {
413
+ # Any check-specific parameters
414
+ }
415
+ }
416
+ """
417
+ if not self.ws or self.ws.closed:
418
+ logger.warning(
419
+ f"[CLIENT-OPENCLAW-NOTIFY] WebSocket not connected, skipping notification: "
420
+ f"client_id={self.client_id}"
421
+ )
422
+ return
423
+
424
+ try:
425
+ notification_msg = {
426
+ "id": str(uuid.uuid4()),
427
+ "type": "health_status", # Keep "health_status" for protocol compatibility
428
+ "timestamp": int(time.time() * 1000),
429
+ "data": {
430
+ "status": status.get("status"),
431
+ "message": status.get("message"),
432
+ "details": status.get("details", {}),
433
+ }
434
+ }
435
+
436
+ logger.info(
437
+ f"[CLIENT-OPENCLAW-NOTIFY] Sending OpenClaw notification: "
438
+ f"status={status.get('status')}, message_id={notification_msg['id']}, "
439
+ f"client_id={self.client_id}"
440
+ )
441
+
442
+ # Send message directly
443
+ await self.ws.send_json(notification_msg)
444
+
445
+ logger.debug(
446
+ f"[CLIENT-OPENCLAW-NOTIFY] OpenClaw notification sent: "
447
+ f"message_id={notification_msg['id']}, client_id={self.client_id}"
448
+ )
449
+
450
+ except Exception as e:
451
+ logger.error(
452
+ f"[CLIENT-OPENCLAW-NOTIFY-ERROR] Failed to send OpenClaw notification: "
453
+ f"error={e}, client_id={self.client_id}",
454
+ exc_info=True
455
+ )
456
+
336
457
  async def _execute_local_command(
337
458
  self,
338
459
  message_id: str,
@@ -1029,6 +1150,19 @@ class WebSocketClient:
1029
1150
  f"client_id={self.client_id}"
1030
1151
  )
1031
1152
 
1153
+ # Start OpenClaw check task (if enabled)
1154
+ if self.openclaw_check_enabled:
1155
+ self.openclaw_check_task = asyncio.create_task(self._openclaw_check_loop())
1156
+ logger.info(
1157
+ f"[CLIENT-OPENCLAW-CHECK-TASK] OpenClaw check task started: "
1158
+ f"interval={self.openclaw_check_interval}s, client_id={self.client_id}"
1159
+ )
1160
+ else:
1161
+ logger.debug(
1162
+ f"[CLIENT-OPENCLAW-CHECK-TASK] OpenClaw check disabled, skipping task start: "
1163
+ f"client_id={self.client_id}"
1164
+ )
1165
+
1032
1166
  return True
1033
1167
 
1034
1168
  except Exception as e:
@@ -1044,6 +1178,22 @@ class WebSocketClient:
1044
1178
  """Disconnect WebSocket connection"""
1045
1179
  logger.info(f"[CLIENT-DISCONNECT-START] Starting disconnect process: client_id={self.client_id}")
1046
1180
 
1181
+ # Stop OpenClaw check task
1182
+ if self.openclaw_check_task and not self.openclaw_check_task.done():
1183
+ logger.debug(
1184
+ f"[CLIENT-DISCONNECT-OPENCLAW-CHECK] Cancelling OpenClaw check task: "
1185
+ f"client_id={self.client_id}"
1186
+ )
1187
+ self.openclaw_check_task.cancel()
1188
+ try:
1189
+ await self.openclaw_check_task
1190
+ except asyncio.CancelledError:
1191
+ pass
1192
+ logger.info(
1193
+ f"[CLIENT-DISCONNECT-OPENCLAW-CHECK] OpenClaw check task stopped: "
1194
+ f"client_id={self.client_id}"
1195
+ )
1196
+
1047
1197
  # Stop heartbeat task
1048
1198
  if self.heartbeat_task and not self.heartbeat_task.done():
1049
1199
  logger.debug(f"[CLIENT-DISCONNECT-HEARTBEAT] Cancelling heartbeat task: client_id={self.client_id}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: castrel-proxy
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: A lightweight remote command execution bridge client with MCP integration
5
5
  Project-URL: Homepage, https://github.com/castrel-ai/castrel-proxy
6
6
  Project-URL: Documentation, https://github.com/castrel-ai/castrel-proxy#readme
@@ -44,6 +44,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
44
44
  Classifier: Topic :: System :: Networking
45
45
  Requires-Python: >=3.10
46
46
  Requires-Dist: aiohttp>=3.9.0
47
+ Requires-Dist: file-read-backwards>=2.0.0
47
48
  Requires-Dist: langchain-mcp-adapters>=0.2.1
48
49
  Requires-Dist: mcp>=1.0.0
49
50
  Requires-Dist: pyyaml>=6.0.1
@@ -1,24 +1,26 @@
1
1
  castrel_proxy/__init__.py,sha256=toQT6mtxjfUBSr2T7ajweV6APYrExW0_ykD8H28SW1U,550
2
2
  castrel_proxy/cli/__init__.py,sha256=qc0mPeDSpBEb8cT0MAMJTAu2NPouLI96XXtKiuFRXVk,112
3
- castrel_proxy/cli/commands.py,sha256=B0v8EY2gU8C8ln2ff7vKCg7CgqE-9DSuzTEp8zAVHp4,21840
4
- castrel_proxy/core/__init__.py,sha256=ID7EHJ0ILIGZYUJPWoYfyJIjXWTcu9EsaA4h7IsExLw,479
3
+ castrel_proxy/cli/commands.py,sha256=FcqUjRossMQG95fLW6S0J-Jec_1W4QafV8Jt6OR7AT4,21841
4
+ castrel_proxy/core/__init__.py,sha256=Mbxkrhpl3-k73GZGpUxu0W_XQ7upGTKmUfzjMFCDLrI,574
5
5
  castrel_proxy/core/client_id.py,sha256=uU3YF2kxyFHfysMkRRjR13Ic-F5nl9fAa4vUyTui5fU,2364
6
- castrel_proxy/core/config.py,sha256=SoxVyRs4K4iWBmDq2EgA9goeTdLerfturb0ZnfORHcw,4668
6
+ castrel_proxy/core/config.py,sha256=NMOFPY95srMvaCM6DDZKZ4bZdCkGZjR2PQUmenB0VAY,8642
7
7
  castrel_proxy/core/daemon.py,sha256=y0m7KIW2iSHcdCvIXAg_pNx5IbVZ3Aa1O6XGwyENnVY,5487
8
8
  castrel_proxy/core/executor.py,sha256=Z1TBW6rRJMDuls0dF0_W32OuOPaZNmlB4DpCC55IJJ0,5513
9
+ castrel_proxy/core/openclaw.py,sha256=467AHzF7rOVU4SJqseOhZrU2lL9bui7IaWJ5cKV2Zl8,20789
9
10
  castrel_proxy/data/__init__.py,sha256=NTWvSsQ2vYDYvJV4Xng2q2q6Qyj1Z4VikLeQPlOre4o,42
10
11
  castrel_proxy/data/default_whitelist.txt,sha256=tkjvX6Q-kXzy8bgkPssWzBbakDJvUZL7BQbIDDj7XbM,2135
12
+ castrel_proxy/data/openclaw_runtime_log_rules.json,sha256=liSKU5ftGX2oeHhT2P7Hh0ZA8mE8GyzrdQCMtGsFuCQ,1433
11
13
  castrel_proxy/mcp/__init__.py,sha256=aPUkCh4im-MlU3byXn_nXp9k85MUY1wfFLvDwH7eDjM,161
12
14
  castrel_proxy/mcp/manager.py,sha256=epxT4zDMGmgzPzV7F3swM6ExMFlbx_fz7OIqsmgXZsY,9506
13
15
  castrel_proxy/network/__init__.py,sha256=C7PPMqB44X8yMSNMv8vuILPI26xCfLf5levuzIrjdXk,329
14
16
  castrel_proxy/network/api_client.py,sha256=eMpRTlnvphakTgufqxmLFGr5PLEPTR9FtZ1oBrZrQ68,9898
15
- castrel_proxy/network/websocket_client.py,sha256=-PJ25FVDcEaWod7pJjSQzjoTQOygVcw5buO8g8xUyKo,43470
17
+ castrel_proxy/network/websocket_client.py,sha256=eMb5VN0MzSkxWmpFJeEjW_LLtNxT7OVF2U62_DLLpZc,49755
16
18
  castrel_proxy/operations/__init__.py,sha256=NGv4Z-8Fek9KiDv630CmekWvbC2n-BKlc2yqaUHSqZc,317
17
19
  castrel_proxy/operations/document.py,sha256=UnybzI3FCOVteMHkd88JaR6WvU2YS1e9Rf45wTttGss,10673
18
20
  castrel_proxy/security/__init__.py,sha256=aC0CcnM4mfuuXfYJniMbn47JN1eQUXU-k3ByqfWuK4s,352
19
21
  castrel_proxy/security/whitelist.py,sha256=nU5_ovpGZOgZfhMofoxt9ImxJR8boYdk937Tz-K34C4,11493
20
- castrel_proxy-0.1.2.dist-info/METADATA,sha256=RtmzYM-E2d3CeDerhRqjgnSRbPyzc1jRr4Oj7EGaxWI,10041
21
- castrel_proxy-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
22
- castrel_proxy-0.1.2.dist-info/entry_points.txt,sha256=5X5vHpAFgIFpvt2CmI5U0d_naKA64eE-IZKEqvnS7VE,65
23
- castrel_proxy-0.1.2.dist-info/licenses/LICENSE,sha256=cqP1Kg_ECZMt4RPE8h5uL4-qpEaYTgca7r4aHtMIDGk,1066
24
- castrel_proxy-0.1.2.dist-info/RECORD,,
22
+ castrel_proxy-0.1.4.dist-info/METADATA,sha256=pvrDxc_HgB6rlMwG7V8pGB7WrMViSUdKEvhXIxa5hxw,10083
23
+ castrel_proxy-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
24
+ castrel_proxy-0.1.4.dist-info/entry_points.txt,sha256=5X5vHpAFgIFpvt2CmI5U0d_naKA64eE-IZKEqvnS7VE,65
25
+ castrel_proxy-0.1.4.dist-info/licenses/LICENSE,sha256=cqP1Kg_ECZMt4RPE8h5uL4-qpEaYTgca7r4aHtMIDGk,1066
26
+ castrel_proxy-0.1.4.dist-info/RECORD,,