loki-mode 5.53.0 → 5.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,281 @@
1
+ """
2
+ Prompt Optimizer for Loki Mode.
3
+
4
+ Uses failure patterns from FailureExtractor to generate improved prompt
5
+ sections for agents. Stores versioned prompts with change tracking.
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ import logging
11
+ import os
12
+ import tempfile
13
+ import threading
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from .failure_extractor import FailureExtractor
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ LOKI_DATA_DIR = os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
23
+
24
+
25
+ def _content_hash(text: str) -> str:
26
+ """Generate a short hash of prompt content for change tracking."""
27
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]
28
+
29
+
30
+ def _timestamp_iso() -> str:
31
+ """Return current UTC timestamp in ISO format."""
32
+ return datetime.now(timezone.utc).isoformat()
33
+
34
+
35
+ class PromptOptimizer:
36
+ """Generates and manages versioned prompt optimizations based on failure analysis."""
37
+
38
+ def __init__(self, data_dir: str | None = None) -> None:
39
+ self._data_dir = Path(data_dir) if data_dir else Path(LOKI_DATA_DIR)
40
+ self._prompts_dir = self._data_dir / "prompts" / "optimized"
41
+ self._extractor = FailureExtractor(data_dir=str(self._data_dir))
42
+ self._file_lock = threading.Lock()
43
+
44
+ def _ensure_dirs(self) -> None:
45
+ """Create prompt storage directories if they do not exist."""
46
+ self._prompts_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ def _version_file(self, version: int) -> Path:
49
+ """Return the path for a specific version file."""
50
+ return self._prompts_dir / f"v{version:04d}.json"
51
+
52
+ def _latest_file(self) -> Path:
53
+ """Return path to the latest.json symlink/file."""
54
+ return self._prompts_dir / "latest.json"
55
+
56
+ def get_current_version(self) -> dict[str, Any]:
57
+ """Get the current (latest) prompt optimization version.
58
+
59
+ Returns:
60
+ The latest version data, or a default structure if none exists.
61
+ """
62
+ latest = self._latest_file()
63
+ if latest.is_file():
64
+ try:
65
+ return json.loads(latest.read_text(encoding="utf-8"))
66
+ except (json.JSONDecodeError, OSError) as exc:
67
+ logger.warning("Failed to read latest prompt version: %s", exc)
68
+
69
+ return {
70
+ "version": 0,
71
+ "generated_at": None,
72
+ "based_on_sessions": 0,
73
+ "failures_analyzed": 0,
74
+ "changes": [],
75
+ }
76
+
77
+ def get_prompt_for_agent(self, agent_type: str) -> dict[str, Any] | None:
78
+ """Get the current optimized prompt section for a specific agent type.
79
+
80
+ Supports hot-reload by reading from disk on each call.
81
+
82
+ Args:
83
+ agent_type: The agent type to retrieve prompt for (e.g. "test_engineer").
84
+
85
+ Returns:
86
+ Dict with agent-specific prompt changes, or None if no optimizations exist.
87
+ """
88
+ current = self.get_current_version()
89
+ if current["version"] == 0:
90
+ return None
91
+
92
+ agent_changes = [c for c in current.get("changes", []) if c.get("agent_type") == agent_type]
93
+ if not agent_changes:
94
+ return None
95
+
96
+ return {
97
+ "agent_type": agent_type,
98
+ "version": current["version"],
99
+ "generated_at": current["generated_at"],
100
+ "changes": agent_changes,
101
+ }
102
+
103
+ def _generate_changes(self, failure_data: dict[str, Any]) -> list[dict[str, Any]]:
104
+ """Generate prompt change suggestions from failure patterns.
105
+
106
+ TODO: Integrate actual LLM-as-judge call here. Currently generates
107
+ structured change suggestions based on pattern heuristics. Replace
108
+ the heuristic logic below with an LLM call that analyzes failures
109
+ and produces improved prompt sections.
110
+
111
+ Args:
112
+ failure_data: Output from FailureExtractor.extract().
113
+
114
+ Returns:
115
+ List of change dicts with agent_type, section, hashes, and rationale.
116
+ """
117
+ changes: list[dict[str, Any]] = []
118
+
119
+ for pattern in failure_data.get("patterns", []):
120
+ category = pattern["category"]
121
+ agent_types = pattern.get("agent_types", [])
122
+ count = pattern["count"]
123
+ representative = pattern.get("representative_error", "")
124
+
125
+ # If no agent types identified, apply to general
126
+ if not agent_types:
127
+ agent_types = ["general"]
128
+
129
+ for agent_type in agent_types:
130
+ # Determine which prompt section to modify based on failure category
131
+ section, rationale = self._section_for_category(
132
+ category, count, representative
133
+ )
134
+
135
+ old_content = f"{agent_type}:{section}:current"
136
+ new_content = f"{agent_type}:{section}:optimized:{category}:{count}"
137
+
138
+ changes.append({
139
+ "agent_type": agent_type,
140
+ "section": section,
141
+ "old_hash": _content_hash(old_content),
142
+ "new_hash": _content_hash(new_content),
143
+ "rationale": rationale,
144
+ })
145
+
146
+ # Deduplicate by (agent_type, section)
147
+ seen: set[tuple[str, str]] = set()
148
+ deduped: list[dict[str, Any]] = []
149
+ for change in changes:
150
+ key = (change["agent_type"], change["section"])
151
+ if key not in seen:
152
+ seen.add(key)
153
+ deduped.append(change)
154
+
155
+ return deduped
156
+
157
+ def _section_for_category(
158
+ self, category: str, count: int, error_msg: str
159
+ ) -> tuple[str, str]:
160
+ """Map a failure category to the prompt section that should be improved.
161
+
162
+ Returns:
163
+ Tuple of (section_name, rationale).
164
+ """
165
+ if category == "timeout":
166
+ return (
167
+ "timeout_handling",
168
+ f"Agent timeouts detected {count} times. "
169
+ "Add explicit timeout boundaries and fallback instructions.",
170
+ )
171
+ elif category == "verification":
172
+ return (
173
+ "verification_instructions",
174
+ f"Verification failures detected {count} times. "
175
+ "Strengthen verification steps and add pre-check instructions.",
176
+ )
177
+ elif category == "retry":
178
+ return (
179
+ "retry_strategy",
180
+ f"Excessive retries detected {count} times. "
181
+ "Add early-exit conditions and reduce maximum retry attempts.",
182
+ )
183
+ else:
184
+ return (
185
+ "error_handling",
186
+ f"Errors detected {count} times: {error_msg[:200]}. "
187
+ "Add targeted error handling guidance.",
188
+ )
189
+
190
+ def optimize(
191
+ self, sessions: int = 10, dry_run: bool = False
192
+ ) -> dict[str, Any]:
193
+ """Run prompt optimization from failure analysis.
194
+
195
+ Args:
196
+ sessions: Number of recent sessions to analyze.
197
+ dry_run: If True, generate changes but do not persist them.
198
+
199
+ Returns:
200
+ The new version data with all changes.
201
+ """
202
+ # Extract failures
203
+ failure_data = self._extractor.extract(sessions=sessions)
204
+
205
+ if failure_data["total_failures"] == 0:
206
+ return {
207
+ "version": self.get_current_version()["version"],
208
+ "generated_at": _timestamp_iso(),
209
+ "based_on_sessions": failure_data["session_count"],
210
+ "failures_analyzed": 0,
211
+ "changes": [],
212
+ "dry_run": dry_run,
213
+ }
214
+
215
+ # Generate changes
216
+ changes = self._generate_changes(failure_data)
217
+
218
+ with self._file_lock:
219
+ current_version = self.get_current_version()["version"]
220
+ new_version = current_version + 1
221
+
222
+ result = {
223
+ "version": new_version,
224
+ "generated_at": _timestamp_iso(),
225
+ "based_on_sessions": failure_data["session_count"],
226
+ "failures_analyzed": failure_data["total_failures"],
227
+ "changes": changes,
228
+ "dry_run": dry_run,
229
+ }
230
+
231
+ if not dry_run:
232
+ self._persist_version(result)
233
+
234
+ return result
235
+
236
+ def _persist_version(self, version_data: dict[str, Any]) -> None:
237
+ """Write version data to disk in a thread-safe manner.
238
+
239
+ Caller must already hold self._file_lock.
240
+ Uses atomic temp-file-then-rename for POSIX safety.
241
+ """
242
+ self._ensure_dirs()
243
+
244
+ # Remove dry_run flag from persisted data
245
+ persist_data = {k: v for k, v in version_data.items() if k != "dry_run"}
246
+
247
+ version_path = self._version_file(persist_data["version"])
248
+ latest_path = self._latest_file()
249
+ content = json.dumps(persist_data, indent=2, ensure_ascii=False)
250
+
251
+ try:
252
+ # Atomic write to version file
253
+ fd, tmp_path = tempfile.mkstemp(
254
+ dir=str(self._prompts_dir), suffix=".tmp"
255
+ )
256
+ try:
257
+ os.write(fd, content.encode("utf-8"))
258
+ os.fsync(fd)
259
+ finally:
260
+ os.close(fd)
261
+ os.rename(tmp_path, str(version_path))
262
+
263
+ # Atomic write to latest file
264
+ fd, tmp_path = tempfile.mkstemp(
265
+ dir=str(self._prompts_dir), suffix=".tmp"
266
+ )
267
+ try:
268
+ os.write(fd, content.encode("utf-8"))
269
+ os.fsync(fd)
270
+ finally:
271
+ os.close(fd)
272
+ os.rename(tmp_path, str(latest_path))
273
+
274
+ logger.info(
275
+ "Persisted prompt optimization v%d to %s",
276
+ persist_data["version"],
277
+ version_path,
278
+ )
279
+ except OSError as exc:
280
+ logger.error("Failed to persist prompt version: %s", exc)
281
+ raise
@@ -0,0 +1,331 @@
1
+ """
2
+ Rigour Quality Gate Integration for Loki Mode.
3
+
4
+ Shells out to `npx @rigour-labs/cli` to run quality scans, parses JSON output,
5
+ and maps findings to the Loki Mode quality gate format.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import shutil
12
+ import subprocess
13
+ import threading
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path
16
+ from typing import Any, Optional
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Compliance presets supported by Rigour CLI
21
+ VALID_PRESETS = ("default", "healthcare", "fintech", "government")
22
+
23
+ # Severity levels in descending order of importance
24
+ SEVERITY_LEVELS = ("critical", "major", "minor", "info")
25
+
26
+ # Grade thresholds
27
+ _GRADE_THRESHOLDS = [
28
+ (97, "A+"), (93, "A"), (90, "A-"),
29
+ (87, "B+"), (83, "B"), (80, "B-"),
30
+ (77, "C+"), (73, "C"), (70, "C-"),
31
+ (67, "D+"), (63, "D"), (60, "D-"),
32
+ (0, "F"),
33
+ ]
34
+
35
+
36
+ def _score_to_grade(score: float) -> str:
37
+ """Convert a numeric score (0-100) to a letter grade."""
38
+ for threshold, grade in _GRADE_THRESHOLDS:
39
+ if score >= threshold:
40
+ return grade
41
+ return "F"
42
+
43
+
44
+ class RigourIntegration:
45
+ """Integration with Rigour Labs CLI for quality scanning."""
46
+
47
+ def __init__(self, data_dir: Optional[str] = None) -> None:
48
+ self._data_dir = Path(
49
+ data_dir or os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
50
+ )
51
+ self._scores_dir = self._data_dir / "quality"
52
+ self._scores_file = self._scores_dir / "scores.jsonl"
53
+ self._lock = threading.Lock()
54
+ self._last_score: Optional[dict[str, Any]] = None
55
+
56
+ # ------------------------------------------------------------------
57
+ # Public API
58
+ # ------------------------------------------------------------------
59
+
60
+ @property
61
+ def available(self) -> bool:
62
+ """Check if the Rigour CLI is available via npx."""
63
+ return shutil.which("npx") is not None
64
+
65
+ def scan(self, path: str = ".", preset: str = "default") -> dict[str, Any]:
66
+ """Run a Rigour quality scan and return parsed results.
67
+
68
+ Args:
69
+ path: Directory or file to scan.
70
+ preset: Compliance preset (default, healthcare, fintech, government).
71
+
72
+ Returns:
73
+ Parsed scan results in Loki Mode quality gate format.
74
+ """
75
+ if preset not in VALID_PRESETS:
76
+ return {
77
+ "available": False,
78
+ "error": f"Invalid preset: {preset}. Must be one of {VALID_PRESETS}",
79
+ }
80
+
81
+ if not self.available:
82
+ logger.warning("Rigour CLI not available (npx not found). Returning empty results.")
83
+ return self._empty_result(preset, reason="npx not found")
84
+
85
+ try:
86
+ result = subprocess.run(
87
+ [
88
+ "npx", "@rigour-labs/cli", "scan",
89
+ "--format", "json",
90
+ "--preset", preset,
91
+ "--", path,
92
+ ],
93
+ capture_output=True,
94
+ text=True,
95
+ timeout=300,
96
+ )
97
+ except FileNotFoundError:
98
+ logger.warning("npx executable not found at runtime.")
99
+ return self._empty_result(preset, reason="npx not found")
100
+ except subprocess.TimeoutExpired:
101
+ logger.error("Rigour scan timed out after 300s.")
102
+ return self._empty_result(preset, reason="scan timed out")
103
+
104
+ if result.returncode != 0 and not result.stdout.strip():
105
+ logger.error("Rigour scan failed (rc=%d): %s", result.returncode, result.stderr.strip())
106
+ return self._empty_result(preset, reason=f"scan failed: {result.stderr.strip()[:200]}")
107
+
108
+ try:
109
+ raw = json.loads(result.stdout)
110
+ except json.JSONDecodeError as exc:
111
+ logger.error("Failed to parse Rigour JSON output: %s", exc)
112
+ return self._empty_result(preset, reason="invalid JSON output")
113
+
114
+ score_data = self._map_to_loki_format(raw, preset)
115
+ self._persist_score(score_data)
116
+ return score_data
117
+
118
+ def get_score(self) -> dict[str, Any]:
119
+ """Return the most recent quality score with breakdown.
120
+
121
+ If no scan has been run yet, returns the last persisted score or empty.
122
+ """
123
+ with self._lock:
124
+ if self._last_score is not None:
125
+ return self._last_score
126
+
127
+ # Try reading the last line of scores.jsonl
128
+ try:
129
+ if self._scores_file.exists():
130
+ lines = self._scores_file.read_text().strip().splitlines()
131
+ if lines:
132
+ last = json.loads(lines[-1])
133
+ with self._lock:
134
+ self._last_score = last
135
+ return last
136
+ except (json.JSONDecodeError, OSError) as exc:
137
+ logger.warning("Could not read score history: %s", exc)
138
+
139
+ return {
140
+ "available": self.available,
141
+ "score": None,
142
+ "message": "No scan results available. Run a scan first.",
143
+ }
144
+
145
+ def get_score_history(self, limit: int = 50) -> list[dict[str, Any]]:
146
+ """Return score trend over time, most recent first.
147
+
148
+ Args:
149
+ limit: Maximum number of entries to return.
150
+ """
151
+ entries: list[dict[str, Any]] = []
152
+ try:
153
+ if self._scores_file.exists():
154
+ for line in self._scores_file.read_text().strip().splitlines():
155
+ if line.strip():
156
+ try:
157
+ entries.append(json.loads(line))
158
+ except json.JSONDecodeError:
159
+ continue
160
+ except OSError as exc:
161
+ logger.warning("Could not read score history: %s", exc)
162
+
163
+ # Most recent first, limited
164
+ entries.reverse()
165
+ return entries[:limit]
166
+
167
+ def check_blocking(self, severity: str = "critical") -> bool:
168
+ """Return True if there are blocking issues at or above the given severity.
169
+
170
+ Args:
171
+ severity: Minimum severity to consider blocking.
172
+ """
173
+ score = self.get_score()
174
+ if not score or score.get("score") is None:
175
+ return False
176
+
177
+ findings = score.get("findings", {})
178
+ try:
179
+ idx = SEVERITY_LEVELS.index(severity.lower())
180
+ except ValueError:
181
+ idx = 0 # Default to critical only
182
+
183
+ blocking_severities = SEVERITY_LEVELS[:idx + 1]
184
+ return any(findings.get(s, 0) > 0 for s in blocking_severities)
185
+
186
+ def export_report(self, fmt: str = "json") -> str:
187
+ """Generate an audit report from the latest scan.
188
+
189
+ Args:
190
+ fmt: Output format (currently only 'json' supported).
191
+
192
+ Returns:
193
+ Formatted report string.
194
+ """
195
+ score = self.get_score()
196
+ report = {
197
+ "report_type": "quality_audit",
198
+ "generated_at": datetime.now(timezone.utc).isoformat(),
199
+ "format": fmt,
200
+ "available": score.get("available", self.available),
201
+ "current_score": score if score.get("score") is not None else None,
202
+ "history_summary": self._summarize_history(),
203
+ "blocking_issues": self.check_blocking("critical"),
204
+ }
205
+
206
+ if fmt == "json":
207
+ return json.dumps(report, indent=2)
208
+
209
+ # Plain text fallback
210
+ lines = [
211
+ "Loki Mode Quality Audit Report",
212
+ f"Generated: {report['generated_at']}",
213
+ f"Available: {report['available']}",
214
+ "",
215
+ ]
216
+ if report["current_score"]:
217
+ s = report["current_score"]
218
+ lines.append(f"Score: {s.get('score', 'N/A')} / {s.get('max_score', 100)} ({s.get('grade', 'N/A')})")
219
+ lines.append(f"Preset: {s.get('preset', 'N/A')}")
220
+ findings = s.get("findings", {})
221
+ lines.append(f"Findings: critical={findings.get('critical', 0)} major={findings.get('major', 0)} minor={findings.get('minor', 0)} info={findings.get('info', 0)}")
222
+ else:
223
+ lines.append("No scan data available.")
224
+ return "\n".join(lines)
225
+
226
+ # ------------------------------------------------------------------
227
+ # Internal helpers
228
+ # ------------------------------------------------------------------
229
+
230
+ def _map_to_loki_format(self, raw: dict[str, Any], preset: str) -> dict[str, Any]:
231
+ """Map raw Rigour CLI output to Loki Mode quality gate format."""
232
+ # Extract findings by severity
233
+ raw_findings = raw.get("findings", raw.get("issues", []))
234
+ findings: dict[str, int] = {"critical": 0, "major": 0, "minor": 0, "info": 0}
235
+ if isinstance(raw_findings, list):
236
+ for f in raw_findings:
237
+ sev = str(f.get("severity", "info")).lower()
238
+ if sev in findings:
239
+ findings[sev] += 1
240
+ else:
241
+ findings["info"] += 1
242
+ elif isinstance(raw_findings, dict):
243
+ for sev in findings:
244
+ findings[sev] = int(_extract_num(raw_findings, sev, 0))
245
+
246
+ # Extract category scores
247
+ raw_categories = raw.get("categories", raw.get("scores", {}))
248
+ categories = {
249
+ "security": _extract_num(raw_categories, "security", 0),
250
+ "code_quality": _extract_num(raw_categories, "code_quality", _extract_num(raw_categories, "codeQuality", 0)),
251
+ "compliance": _extract_num(raw_categories, "compliance", 0),
252
+ "best_practices": _extract_num(raw_categories, "best_practices", _extract_num(raw_categories, "bestPractices", 0)),
253
+ }
254
+
255
+ # Compute overall score
256
+ raw_score = raw.get("score", raw.get("overall_score"))
257
+ if raw_score is not None:
258
+ try:
259
+ score = float(raw_score)
260
+ except (ValueError, TypeError):
261
+ score = 0.0
262
+ else:
263
+ # Average of non-zero categories
264
+ non_zero = [v for v in categories.values() if v > 0]
265
+ score = round(sum(non_zero) / len(non_zero), 1) if non_zero else 0.0
266
+
267
+ result: dict[str, Any] = {
268
+ "available": True,
269
+ "score": round(score, 1),
270
+ "max_score": 100,
271
+ "grade": _score_to_grade(score),
272
+ "timestamp": datetime.now(timezone.utc).isoformat(),
273
+ "preset": preset,
274
+ "findings": findings,
275
+ "categories": categories,
276
+ }
277
+
278
+ with self._lock:
279
+ self._last_score = result
280
+
281
+ return result
282
+
283
+ def _persist_score(self, score_data: dict[str, Any]) -> None:
284
+ """Append score to the JSONL history file (thread-safe)."""
285
+ with self._lock:
286
+ try:
287
+ self._scores_dir.mkdir(parents=True, exist_ok=True)
288
+ with open(self._scores_file, "a") as f:
289
+ f.write(json.dumps(score_data) + "\n")
290
+ except OSError as exc:
291
+ logger.error("Failed to persist score: %s", exc)
292
+
293
+ def _empty_result(self, preset: str, reason: str = "") -> dict[str, Any]:
294
+ """Return an empty result when scanning is not possible."""
295
+ return {
296
+ "available": False,
297
+ "score": None,
298
+ "max_score": 100,
299
+ "grade": None,
300
+ "timestamp": datetime.now(timezone.utc).isoformat(),
301
+ "preset": preset,
302
+ "findings": {"critical": 0, "major": 0, "minor": 0, "info": 0},
303
+ "categories": {"security": 0, "code_quality": 0, "compliance": 0, "best_practices": 0},
304
+ "reason": reason,
305
+ }
306
+
307
+ def _summarize_history(self) -> dict[str, Any]:
308
+ """Produce a brief summary of score history."""
309
+ history = self.get_score_history(limit=100)
310
+ if not history:
311
+ return {"entries": 0}
312
+
313
+ scores = [h["score"] for h in history if h.get("score") is not None]
314
+ if not scores:
315
+ return {"entries": len(history)}
316
+
317
+ return {
318
+ "entries": len(history),
319
+ "latest": scores[0],
320
+ "min": min(scores),
321
+ "max": max(scores),
322
+ "avg": round(sum(scores) / len(scores), 1),
323
+ }
324
+
325
+
326
+ def _extract_num(d: dict, key: str, default: float = 0) -> float:
327
+ """Safely extract a numeric value from a dict."""
328
+ try:
329
+ return float(d.get(key, default))
330
+ except (ValueError, TypeError):
331
+ return default