loki-mode 5.53.0 → 5.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ """
2
+ Activity Logger for Loki Mode Dashboard.
3
+
4
+ Appends structured JSONL entries to ~/.loki/activity.jsonl with automatic
5
+ rotation at 10MB. Provides query and session-diff capabilities.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import threading
12
+ from datetime import datetime, timedelta, timezone
13
+ from pathlib import Path
14
+ from typing import Any, Optional
15
+
16
+ logger = logging.getLogger("loki-activity")
17
+
18
+ LOKI_DATA_DIR = os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
19
+
20
+ # Valid entity types and actions for validation
21
+ VALID_ENTITY_TYPES = {"task", "agent", "phase", "checkpoint"}
22
+ VALID_ACTIONS = {"created", "status_changed", "completed", "failed", "blocked"}
23
+
24
+ # Rotation threshold in bytes (10MB)
25
+ MAX_FILE_SIZE = 10 * 1024 * 1024
26
+
27
+
28
+ class ActivityLogger:
29
+ """Thread-safe activity logger that writes JSONL to ~/.loki/activity.jsonl."""
30
+
31
+ def __init__(self, data_dir: Optional[str] = None) -> None:
32
+ self._data_dir = Path(data_dir or LOKI_DATA_DIR)
33
+ self._log_file = self._data_dir / "activity.jsonl"
34
+ self._lock = threading.Lock()
35
+ self._data_dir.mkdir(parents=True, exist_ok=True)
36
+
37
+ @property
38
+ def log_file(self) -> Path:
39
+ """Return the path to the current activity log file."""
40
+ return self._log_file
41
+
42
+ def _rotate_if_needed(self) -> None:
43
+ """Rotate the log file if it exceeds MAX_FILE_SIZE."""
44
+ try:
45
+ if self._log_file.exists() and self._log_file.stat().st_size >= MAX_FILE_SIZE:
46
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
47
+ rotated = self._data_dir / f"activity-{timestamp}.jsonl"
48
+ self._log_file.rename(rotated)
49
+ logger.info("Rotated activity log to %s", rotated)
50
+ except OSError as e:
51
+ logger.warning("Failed to rotate activity log: %s", e)
52
+
53
+ def log(
54
+ self,
55
+ entity_type: str,
56
+ entity_id: str,
57
+ action: str,
58
+ old_value: Optional[str] = None,
59
+ new_value: Optional[str] = None,
60
+ session_id: Optional[str] = None,
61
+ ) -> dict[str, Any]:
62
+ """Log an activity entry. Returns the entry dict."""
63
+ if entity_type not in VALID_ENTITY_TYPES:
64
+ logger.warning("Invalid entity_type %r (valid: %s)", entity_type, VALID_ENTITY_TYPES)
65
+ if action not in VALID_ACTIONS:
66
+ logger.warning("Invalid action %r (valid: %s)", action, VALID_ACTIONS)
67
+
68
+ entry: dict[str, Any] = {
69
+ "timestamp": datetime.now(timezone.utc).isoformat(),
70
+ "entity_type": entity_type,
71
+ "entity_id": entity_id,
72
+ "action": action,
73
+ "old_value": old_value,
74
+ "new_value": new_value,
75
+ "session_id": session_id,
76
+ }
77
+
78
+ with self._lock:
79
+ self._rotate_if_needed()
80
+ try:
81
+ with open(self._log_file, "a", encoding="utf-8") as f:
82
+ f.write(json.dumps(entry, separators=(",", ":")) + "\n")
83
+ except OSError as e:
84
+ logger.error("Failed to write activity entry: %s", e)
85
+
86
+ return entry
87
+
88
+ def query_since(self, timestamp: str) -> list[dict[str, Any]]:
89
+ """Return activity entries after the given ISO timestamp."""
90
+ # Normalize Z-suffix so comparisons work consistently
91
+ timestamp = timestamp.replace("Z", "+00:00")
92
+ results: list[dict[str, Any]] = []
93
+
94
+ if not self._log_file.exists():
95
+ return results
96
+
97
+ with self._lock:
98
+ try:
99
+ with open(self._log_file, "r", encoding="utf-8") as f:
100
+ for line in f:
101
+ line = line.strip()
102
+ if not line:
103
+ continue
104
+ try:
105
+ entry = json.loads(line)
106
+ entry_ts = entry.get("timestamp", "").replace("Z", "+00:00")
107
+ if entry_ts > timestamp:
108
+ results.append(entry)
109
+ except json.JSONDecodeError:
110
+ continue
111
+ except OSError as e:
112
+ logger.error("Failed to read activity log: %s", e)
113
+
114
+ return results
115
+
116
+ def get_session_diff(self, since_timestamp: Optional[str] = None) -> dict[str, Any]:
117
+ """Return a structured summary of activity since the given timestamp.
118
+
119
+ If no timestamp is provided, defaults to the last 24 hours.
120
+ """
121
+ if since_timestamp is None:
122
+ since_dt = datetime.now(timezone.utc) - timedelta(hours=24)
123
+ since_timestamp = since_dt.isoformat()
124
+
125
+ # Normalize Z-suffix before passing to query_since
126
+ since_timestamp = since_timestamp.replace("Z", "+00:00")
127
+
128
+ entries = self.query_since(since_timestamp)
129
+
130
+ now = datetime.now(timezone.utc)
131
+ try:
132
+ since_dt = datetime.fromisoformat(since_timestamp.replace("Z", "+00:00"))
133
+ except (ValueError, AttributeError):
134
+ since_dt = now
135
+
136
+ period_hours = max(0.0, (now - since_dt).total_seconds() / 3600)
137
+
138
+ # Build summary counts
139
+ summary = {
140
+ "total_changes": len(entries),
141
+ "tasks_created": 0,
142
+ "tasks_completed": 0,
143
+ "tasks_blocked": 0,
144
+ "phases_transitioned": 0,
145
+ "checkpoints_created": 0,
146
+ "errors": 0,
147
+ }
148
+
149
+ highlights: list[str] = []
150
+ decisions: list[dict[str, str]] = []
151
+
152
+ for entry in entries:
153
+ entity_type = entry.get("entity_type", "")
154
+ action = entry.get("action", "")
155
+ entity_id = entry.get("entity_id", "")
156
+
157
+ if entity_type == "task":
158
+ if action == "created":
159
+ summary["tasks_created"] += 1
160
+ highlights.append(f"Task {entity_id} created")
161
+ elif action == "completed":
162
+ summary["tasks_completed"] += 1
163
+ highlights.append(f"Task {entity_id} completed")
164
+ elif action == "blocked":
165
+ summary["tasks_blocked"] += 1
166
+ highlights.append(f"Task {entity_id} blocked")
167
+ elif action == "failed":
168
+ summary["errors"] += 1
169
+ highlights.append(f"Task {entity_id} failed")
170
+ elif action == "status_changed":
171
+ old_val = entry.get("old_value", "")
172
+ new_val = entry.get("new_value", "")
173
+ highlights.append(f"Task {entity_id}: {old_val} -> {new_val}")
174
+
175
+ elif entity_type == "agent":
176
+ if action == "failed":
177
+ summary["errors"] += 1
178
+ highlights.append(f"Agent {entity_id} failed")
179
+ elif action == "created":
180
+ highlights.append(f"Agent {entity_id} created")
181
+ elif action == "status_changed":
182
+ old_val = entry.get("old_value", "")
183
+ new_val = entry.get("new_value", "")
184
+ highlights.append(f"Agent {entity_id}: {old_val} -> {new_val}")
185
+ # Agent status changes may represent decisions
186
+ if new_val:
187
+ decisions.append({
188
+ "timestamp": entry.get("timestamp", ""),
189
+ "decision": f"Agent {entity_id} transitioned to {new_val}",
190
+ "reasoning": f"Status changed from {old_val} to {new_val}",
191
+ })
192
+
193
+ elif entity_type == "phase":
194
+ if action == "status_changed":
195
+ summary["phases_transitioned"] += 1
196
+ old_val = entry.get("old_value", "")
197
+ new_val = entry.get("new_value", "")
198
+ highlights.append(f"Phase transition: {old_val} -> {new_val}")
199
+ decisions.append({
200
+ "timestamp": entry.get("timestamp", ""),
201
+ "decision": f"Phase transitioned to {new_val}",
202
+ "reasoning": f"Moved from {old_val} to {new_val}",
203
+ })
204
+
205
+ elif entity_type == "checkpoint":
206
+ if action == "created":
207
+ summary["checkpoints_created"] += 1
208
+ highlights.append(f"Checkpoint {entity_id} created")
209
+
210
+ return {
211
+ "since": since_timestamp,
212
+ "period_hours": round(period_hours, 2),
213
+ "summary": summary,
214
+ "highlights": highlights,
215
+ "decisions": decisions,
216
+ }
217
+
218
+
219
+ # Singleton instance
220
+ _instance: Optional[ActivityLogger] = None
221
+ _instance_lock = threading.Lock()
222
+
223
+
224
+ def get_activity_logger(data_dir: Optional[str] = None) -> ActivityLogger:
225
+ """Get or create the singleton ActivityLogger instance."""
226
+ global _instance
227
+ if _instance is None:
228
+ with _instance_lock:
229
+ if _instance is None:
230
+ _instance = ActivityLogger(data_dir=data_dir)
231
+ return _instance
@@ -0,0 +1,228 @@
1
+ """
2
+ Failure Extractor for Loki Mode.
3
+
4
+ Parses session JSONL log files to identify failure patterns such as
5
+ repeated task failures, excessive RARV cycles, verification failures,
6
+ agent timeouts, and user corrections.
7
+ """
8
+
9
+ import hashlib
10
+ import json
11
+ import logging
12
+ import os
13
+ from collections import defaultdict
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ LOKI_DATA_DIR = os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
20
+
21
+ # Keywords/fields used to classify failure categories
22
+ _TIMEOUT_KEYWORDS = ("timeout", "timed out", "deadline exceeded", "killed")
23
+ _VERIFICATION_KEYWORDS = ("verification failed", "verify failed", "assertion", "expect")
24
+ _RETRY_KEYWORDS = ("retry", "retrying", "attempt ", "reattempt")
25
+
26
+
27
+ def _hash_pattern(text: str) -> str:
28
+ """Generate a short deterministic hash for grouping similar errors."""
29
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]
30
+
31
+
32
+ def _normalize_error(message: str) -> str:
33
+ """Strip variable parts (timestamps, IDs) to group similar errors."""
34
+ import re
35
+ # Remove hex IDs, UUIDs, timestamps, line numbers
36
+ normalized = re.sub(r"[0-9a-f]{8,}", "<ID>", message)
37
+ normalized = re.sub(r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[^\s]*", "<TS>", normalized)
38
+ normalized = re.sub(r":\d+:\d+", ":<LINE>", normalized)
39
+ normalized = re.sub(r"\d+", "<N>", normalized)
40
+ return normalized.strip()
41
+
42
+
43
+ def _classify_failure(entry: dict[str, Any]) -> str | None:
44
+ """Classify a log entry into a failure category, or None if not a failure."""
45
+ message = str(entry.get("message", "") or entry.get("error", "")).lower()
46
+ event_type = str(entry.get("type", "") or entry.get("event", "")).lower()
47
+ status = str(entry.get("status", "")).lower()
48
+
49
+ # Agent timeout
50
+ if any(kw in message for kw in _TIMEOUT_KEYWORDS) or event_type == "timeout":
51
+ return "timeout"
52
+
53
+ # Verification failure
54
+ if any(kw in message for kw in _VERIFICATION_KEYWORDS) or event_type == "verification_failed":
55
+ return "verification"
56
+
57
+ # Retry / repeated failure
58
+ if any(kw in message for kw in _RETRY_KEYWORDS) or event_type in ("retry", "task_retry"):
59
+ return "retry"
60
+
61
+ # RARV cycle excess (iteration count > 2)
62
+ iteration = entry.get("iteration") or entry.get("rarv_iteration")
63
+ if iteration is not None:
64
+ try:
65
+ if int(iteration) > 2:
66
+ return "retry"
67
+ except (ValueError, TypeError):
68
+ pass
69
+
70
+ # Generic error
71
+ if status in ("failed", "error") or event_type in ("error", "failure", "task_failed"):
72
+ return "error"
73
+
74
+ # User correction
75
+ if event_type in ("user_correction", "correction", "override"):
76
+ return "error"
77
+
78
+ return None
79
+
80
+
81
+ class FailureExtractor:
82
+ """Extracts and groups failure patterns from Loki session logs."""
83
+
84
+ def __init__(self, data_dir: str | None = None) -> None:
85
+ self._data_dir = Path(data_dir) if data_dir else Path(LOKI_DATA_DIR)
86
+
87
+ def _log_paths(self) -> list[Path]:
88
+ """Return all candidate JSONL log files, newest first."""
89
+ paths: list[Path] = []
90
+
91
+ # Activity log
92
+ activity = self._data_dir / "activity.jsonl"
93
+ if activity.is_file():
94
+ paths.append(activity)
95
+
96
+ # Session logs directory
97
+ logs_dir = self._data_dir / "logs"
98
+ if logs_dir.is_dir():
99
+ session_files = sorted(logs_dir.glob("*.jsonl"), reverse=True)
100
+ paths.extend(session_files)
101
+
102
+ return paths
103
+
104
+ def _parse_jsonl(self, path: Path, max_lines: int = 50000) -> list[dict[str, Any]]:
105
+ """Parse a JSONL file, returning list of dicts. Skips malformed lines."""
106
+ entries: list[dict[str, Any]] = []
107
+ try:
108
+ with open(path, "r", encoding="utf-8", errors="replace") as fh:
109
+ for i, line in enumerate(fh):
110
+ if i >= max_lines:
111
+ break
112
+ line = line.strip()
113
+ if not line:
114
+ continue
115
+ try:
116
+ entries.append(json.loads(line))
117
+ except json.JSONDecodeError:
118
+ continue
119
+ except OSError as exc:
120
+ logger.warning("Failed to read log file %s: %s", path, exc)
121
+ return entries
122
+
123
+ def _extract_session_id(self, entry: dict[str, Any], fallback: str) -> str:
124
+ """Get session ID from a log entry."""
125
+ return str(entry.get("session_id") or entry.get("session") or fallback)
126
+
127
+ def extract(self, sessions: int = 10) -> dict[str, Any]:
128
+ """Extract failure patterns from the most recent N sessions.
129
+
130
+ Args:
131
+ sessions: Number of recent sessions to analyze.
132
+
133
+ Returns:
134
+ Structured failure data with patterns grouped by similarity.
135
+ """
136
+ log_paths = self._log_paths()
137
+ if not log_paths:
138
+ return {
139
+ "session_count": 0,
140
+ "total_failures": 0,
141
+ "patterns": [],
142
+ }
143
+
144
+ # Collect all failure entries across log files
145
+ # Track sessions seen to respect the session limit
146
+ failures: list[dict[str, Any]] = []
147
+ seen_sessions: set[str] = set()
148
+ session_order: list[str] = []
149
+
150
+ for path in log_paths:
151
+ entries = self._parse_jsonl(path)
152
+ file_session = path.stem # use filename as fallback session ID
153
+
154
+ for entry in entries:
155
+ sid = self._extract_session_id(entry, file_session)
156
+
157
+ # Track session ordering
158
+ if sid not in seen_sessions:
159
+ seen_sessions.add(sid)
160
+ session_order.append(sid)
161
+
162
+ category = _classify_failure(entry)
163
+ if category is not None:
164
+ failures.append({
165
+ "entry": entry,
166
+ "category": category,
167
+ "session_id": sid,
168
+ })
169
+
170
+ # Limit to the most recent N sessions
171
+ recent_sessions = set(session_order[:sessions])
172
+ failures = [f for f in failures if f["session_id"] in recent_sessions]
173
+
174
+ # Group failures by normalized error message + category
175
+ groups: dict[str, dict[str, Any]] = defaultdict(lambda: {
176
+ "category": "",
177
+ "count": 0,
178
+ "agent_types": set(),
179
+ "phases": set(),
180
+ "representative_error": "",
181
+ "sessions": set(),
182
+ })
183
+
184
+ for failure in failures:
185
+ entry = failure["entry"]
186
+ category = failure["category"]
187
+ session_id = failure["session_id"]
188
+
189
+ raw_message = str(
190
+ entry.get("message") or entry.get("error") or entry.get("detail") or ""
191
+ )
192
+ normalized = _normalize_error(raw_message)
193
+ group_key = f"{category}:{_hash_pattern(normalized)}"
194
+
195
+ group = groups[group_key]
196
+ group["category"] = category
197
+ group["count"] += 1
198
+ group["sessions"].add(session_id)
199
+
200
+ if not group["representative_error"]:
201
+ group["representative_error"] = raw_message[:500]
202
+
203
+ agent_type = entry.get("agent_type") or entry.get("agent") or ""
204
+ if agent_type:
205
+ group["agent_types"].add(str(agent_type))
206
+
207
+ phase = entry.get("phase") or entry.get("stage") or ""
208
+ if phase:
209
+ group["phases"].add(str(phase))
210
+
211
+ # Build structured output
212
+ patterns = []
213
+ for group_key, group in sorted(groups.items(), key=lambda x: x[1]["count"], reverse=True):
214
+ patterns.append({
215
+ "pattern_id": _hash_pattern(group_key),
216
+ "category": group["category"],
217
+ "count": group["count"],
218
+ "agent_types": sorted(group["agent_types"]),
219
+ "phases": sorted(group["phases"]),
220
+ "representative_error": group["representative_error"],
221
+ "sessions": sorted(group["sessions"]),
222
+ })
223
+
224
+ return {
225
+ "session_count": len(recent_sessions),
226
+ "total_failures": sum(p["count"] for p in patterns),
227
+ "patterns": patterns,
228
+ }