luckyd-code 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. luckyd_code/__init__.py +54 -0
  2. luckyd_code/__main__.py +5 -0
  3. luckyd_code/_agent_loop.py +551 -0
  4. luckyd_code/_data_dir.py +73 -0
  5. luckyd_code/agent.py +38 -0
  6. luckyd_code/analytics/__init__.py +18 -0
  7. luckyd_code/analytics/reporter.py +195 -0
  8. luckyd_code/analytics/scanner.py +443 -0
  9. luckyd_code/analytics/smells.py +316 -0
  10. luckyd_code/analytics/trends.py +303 -0
  11. luckyd_code/api.py +473 -0
  12. luckyd_code/audit_daemon.py +845 -0
  13. luckyd_code/autonomous_fixer.py +473 -0
  14. luckyd_code/background.py +159 -0
  15. luckyd_code/backup.py +237 -0
  16. luckyd_code/brain/__init__.py +84 -0
  17. luckyd_code/brain/assembler.py +100 -0
  18. luckyd_code/brain/chunker.py +345 -0
  19. luckyd_code/brain/constants.py +73 -0
  20. luckyd_code/brain/embedder.py +163 -0
  21. luckyd_code/brain/graph.py +311 -0
  22. luckyd_code/brain/indexer.py +316 -0
  23. luckyd_code/brain/parser.py +140 -0
  24. luckyd_code/brain/retriever.py +234 -0
  25. luckyd_code/cli.py +894 -0
  26. luckyd_code/cli_commands/__init__.py +1 -0
  27. luckyd_code/cli_commands/audit.py +120 -0
  28. luckyd_code/cli_commands/background.py +83 -0
  29. luckyd_code/cli_commands/brain.py +87 -0
  30. luckyd_code/cli_commands/config.py +75 -0
  31. luckyd_code/cli_commands/dispatcher.py +695 -0
  32. luckyd_code/cli_commands/sessions.py +41 -0
  33. luckyd_code/cli_entry.py +147 -0
  34. luckyd_code/cli_utils.py +112 -0
  35. luckyd_code/config.py +205 -0
  36. luckyd_code/context.py +214 -0
  37. luckyd_code/cost_tracker.py +209 -0
  38. luckyd_code/error_reporter.py +508 -0
  39. luckyd_code/exceptions.py +39 -0
  40. luckyd_code/export.py +126 -0
  41. luckyd_code/feedback_analyzer.py +290 -0
  42. luckyd_code/file_watcher.py +258 -0
  43. luckyd_code/git/__init__.py +11 -0
  44. luckyd_code/git/auto_commit.py +157 -0
  45. luckyd_code/git/tools.py +85 -0
  46. luckyd_code/hooks.py +236 -0
  47. luckyd_code/indexer.py +280 -0
  48. luckyd_code/init.py +39 -0
  49. luckyd_code/keybindings.py +77 -0
  50. luckyd_code/log.py +55 -0
  51. luckyd_code/mcp/__init__.py +6 -0
  52. luckyd_code/mcp/client.py +184 -0
  53. luckyd_code/memory/__init__.py +19 -0
  54. luckyd_code/memory/manager.py +339 -0
  55. luckyd_code/metrics/__init__.py +5 -0
  56. luckyd_code/model_registry.py +131 -0
  57. luckyd_code/orchestrator.py +204 -0
  58. luckyd_code/permissions/__init__.py +1 -0
  59. luckyd_code/permissions/manager.py +103 -0
  60. luckyd_code/planner.py +361 -0
  61. luckyd_code/plugins.py +91 -0
  62. luckyd_code/py.typed +0 -0
  63. luckyd_code/retry.py +57 -0
  64. luckyd_code/router.py +417 -0
  65. luckyd_code/sandbox.py +156 -0
  66. luckyd_code/self_critique.py +2 -0
  67. luckyd_code/self_improve.py +274 -0
  68. luckyd_code/sessions.py +114 -0
  69. luckyd_code/settings.py +72 -0
  70. luckyd_code/skills/__init__.py +8 -0
  71. luckyd_code/skills/review.py +22 -0
  72. luckyd_code/skills/security.py +17 -0
  73. luckyd_code/tasks/__init__.py +1 -0
  74. luckyd_code/tasks/manager.py +102 -0
  75. luckyd_code/templates/icon-192.png +0 -0
  76. luckyd_code/templates/icon-512.png +0 -0
  77. luckyd_code/templates/index.html +1965 -0
  78. luckyd_code/templates/manifest.json +14 -0
  79. luckyd_code/templates/src/app.js +694 -0
  80. luckyd_code/templates/src/body.html +767 -0
  81. luckyd_code/templates/src/cdn.txt +2 -0
  82. luckyd_code/templates/src/style.css +474 -0
  83. luckyd_code/templates/sw.js +31 -0
  84. luckyd_code/templates/test.html +6 -0
  85. luckyd_code/themes.py +48 -0
  86. luckyd_code/tools/__init__.py +97 -0
  87. luckyd_code/tools/agent_tools.py +65 -0
  88. luckyd_code/tools/bash.py +360 -0
  89. luckyd_code/tools/brain_tools.py +137 -0
  90. luckyd_code/tools/browser.py +369 -0
  91. luckyd_code/tools/datetime_tool.py +34 -0
  92. luckyd_code/tools/dockerfile_gen.py +212 -0
  93. luckyd_code/tools/file_ops.py +381 -0
  94. luckyd_code/tools/game_gen.py +360 -0
  95. luckyd_code/tools/git_tools.py +130 -0
  96. luckyd_code/tools/git_worktree.py +63 -0
  97. luckyd_code/tools/path_validate.py +64 -0
  98. luckyd_code/tools/project_gen.py +187 -0
  99. luckyd_code/tools/readme_gen.py +227 -0
  100. luckyd_code/tools/registry.py +157 -0
  101. luckyd_code/tools/shell_detect.py +109 -0
  102. luckyd_code/tools/web.py +89 -0
  103. luckyd_code/tools/youtube.py +187 -0
  104. luckyd_code/tools_bridge.py +144 -0
  105. luckyd_code/undo.py +126 -0
  106. luckyd_code/update.py +60 -0
  107. luckyd_code/verify.py +360 -0
  108. luckyd_code/web_app.py +176 -0
  109. luckyd_code/web_routes/__init__.py +23 -0
  110. luckyd_code/web_routes/background.py +73 -0
  111. luckyd_code/web_routes/brain.py +109 -0
  112. luckyd_code/web_routes/cost.py +12 -0
  113. luckyd_code/web_routes/files.py +133 -0
  114. luckyd_code/web_routes/memories.py +94 -0
  115. luckyd_code/web_routes/misc.py +67 -0
  116. luckyd_code/web_routes/project.py +48 -0
  117. luckyd_code/web_routes/review.py +20 -0
  118. luckyd_code/web_routes/sessions.py +44 -0
  119. luckyd_code/web_routes/settings.py +43 -0
  120. luckyd_code/web_routes/static.py +70 -0
  121. luckyd_code/web_routes/update.py +19 -0
  122. luckyd_code/web_routes/ws.py +237 -0
  123. luckyd_code-1.2.2.dist-info/METADATA +297 -0
  124. luckyd_code-1.2.2.dist-info/RECORD +127 -0
  125. luckyd_code-1.2.2.dist-info/WHEEL +4 -0
  126. luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
  127. luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,845 @@
1
+ """Continuous self-improvement daemon for DeepSeek Code.
2
+
3
+ Runs an audit loop at a configurable interval, collecting metrics,
4
+ detecting regressions, and applying targeted improvements using the
5
+ project's existing self_improve, verify, and agent loop
6
+ infrastructure.
7
+
8
+ Usage (programmatic)::
9
+
10
+ daemon = AuditDaemon(config, project_root="/path/to/project")
11
+ asyncio.run(daemon.run_forever())
12
+
13
+ Usage (CLI)::
14
+
15
+ deepseek-code --daemon
16
+ deepseek-code audit run
17
+ deepseek-code audit status
18
+ deepseek-code audit metrics
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import ctypes
25
+ import datetime
26
+ import json
27
+ import logging
28
+ import os
29
+ import re
30
+ import subprocess
31
+ import sys
32
+ from pathlib import Path
33
+ from typing import Any, Optional
34
+
35
+ _log = logging.getLogger(__name__)
36
+
37
+
38
+ def _pid_is_running(pid: int) -> bool:
39
+ """Return True if *pid* refers to a currently running process.
40
+
41
+ Uses a Windows-safe approach (ctypes OpenProcess) on win32 and the
42
+ standard POSIX signal-0 trick elsewhere.
43
+ """
44
+ if sys.platform == "win32":
45
+ SYNCHRONIZE = 0x00100000
46
+ handle = ctypes.windll.kernel32.OpenProcess(SYNCHRONIZE, False, pid)
47
+ if handle:
48
+ ctypes.windll.kernel32.CloseHandle(handle)
49
+ return True
50
+ return False
51
+ else:
52
+ try:
53
+ os.kill(pid, 0)
54
+ return True
55
+ except (OSError, ProcessLookupError):
56
+ return False
57
+
58
+ # Environment variable that overrides interval
59
+ _INTERVAL_ENV = "AUDIT_INTERVAL_MINUTES"
60
+ _DEFAULT_INTERVAL = 60
61
+
62
+ # Sentinel file — touching this pauses the daemon without killing the process
63
+ _PAUSE_MARKER = ".audit_daemon_paused"
64
+
65
+ # PID lock file — prevents two daemon processes from colliding
66
+ _LOCK_FILE = ".audit_lock"
67
+
68
+ # File cooldown in seconds (24 hours)
69
+ _FILE_COOLDOWN_SECS = 86_400
70
+
71
+ # Smell kind → improvement area mapping — determines which smells are auto-fixable
72
+ _FIXABLE_SMELL_KINDS: dict[str, str] = {
73
+ # Trivial — mechanical fixes
74
+ "syntax_error": "tools",
75
+ "bare_except": "tools",
76
+ "mutable_default": "tools",
77
+ # Structural — focused refactoring
78
+ "long_function": "refactor",
79
+ "deep_nesting": "refactor",
80
+ "too_many_params": "refactor",
81
+ "large_class": "refactor",
82
+ "high_complexity": "refactor",
83
+ "large_file": "refactor",
84
+ # Lightweight cleanup
85
+ "high_todo_density": "cleanup",
86
+ "empty_file": "cleanup",
87
+ "large_file_bytes": "cleanup",
88
+ }
89
+
90
+ # Metric names — single source of truth
91
+ METRIC_NAMES = (
92
+ "test_pass_rate",
93
+ "syntax_error_rate",
94
+ "lint_issue_count",
95
+ "todo_count",
96
+ )
97
+
98
+
99
+ # ------------------------------------------------------------------ #
100
+ # AuditDaemon
101
+ # ------------------------------------------------------------------ #
102
+
103
+ class AuditDaemon:
104
+ """Background daemon that continuously audits and improves the project."""
105
+
106
+ def __init__(
107
+ self,
108
+ config,
109
+ project_root: str,
110
+ interval_minutes: Optional[int] = None,
111
+ ):
112
+ self.config = config
113
+ self.project_root = Path(project_root).resolve()
114
+ self.interval_minutes: int = (
115
+ interval_minutes
116
+ or int(os.environ.get(_INTERVAL_ENV, _DEFAULT_INTERVAL))
117
+ )
118
+
119
+ # Metrics storage
120
+ self._metrics_dir = self.project_root / "luckyd_code" / "metrics"
121
+ self._metrics_dir.mkdir(parents=True, exist_ok=True)
122
+ self._ts_file = self._metrics_dir / "time_series.jsonl"
123
+
124
+ # Audit log
125
+ self._log_file = self.project_root / "luckyd_code" / "audit.log"
126
+
127
+ # In-memory state
128
+ self.last_audit_time: Optional[datetime.datetime] = None
129
+ self.improvement_count: int = 0
130
+
131
+ # Per-file cooldown: maps relative path -> last improvement timestamp
132
+ self._file_last_improved: dict[str, float] = {}
133
+
134
+ # Per-file backoff after failed attempts: maps rel path -> fail count
135
+ self._file_fail_count: dict[str, int] = {}
136
+
137
+ # Configure file handler for audit.log
138
+ fh = logging.FileHandler(str(self._log_file), encoding="utf-8")
139
+ fh.setFormatter(logging.Formatter("%(asctime)s %(levelname)-8s %(message)s"))
140
+ _log.addHandler(fh)
141
+ _log.setLevel(logging.INFO)
142
+
143
+ # ------------------------------------------------------------------ #
144
+ # Public entry points
145
+ # ------------------------------------------------------------------ #
146
+
147
+ async def run_forever(self) -> None:
148
+ """Async loop: acquire lock -> audit -> sleep -> repeat.
149
+
150
+ Exits immediately if another daemon process already holds the lock.
151
+ Releases the lock on exit (normal or exception).
152
+ """
153
+ if not self._acquire_lock():
154
+ _log.error(
155
+ "Another audit daemon is already running. "
156
+ "Check %s/%s. Exiting.",
157
+ self.project_root, _LOCK_FILE,
158
+ )
159
+ return
160
+ try:
161
+ _log.info(
162
+ "Audit daemon starting — interval=%dm, project=%s",
163
+ self.interval_minutes, self.project_root,
164
+ )
165
+ while True:
166
+ try:
167
+ self.audit()
168
+ except Exception as exc:
169
+ _log.exception("Unhandled error in audit(): %s", exc)
170
+ await asyncio.sleep(self.interval_minutes * 60)
171
+ finally:
172
+ self._release_lock()
173
+
174
+ def audit(self) -> dict:
175
+ """Run one full audit cycle. Returns a summary dict.
176
+
177
+ Checks the PID lock so that a standalone CLI invocation (``audit run``)
178
+ also refuses to run if a daemon process is already holding the lock.
179
+ When called from within ``run_forever()`` (same PID), the check is a
180
+ no-op because ``_acquire_lock`` recognises our own PID.
181
+ """
182
+ summary: dict = {
183
+ "timestamp": datetime.datetime.now().isoformat(timespec="seconds"),
184
+ "skipped": False,
185
+ "skip_reason": "",
186
+ "metrics": {},
187
+ "improvements_attempted": 0,
188
+ "improvements_committed": 0,
189
+ "regressions": [],
190
+ }
191
+
192
+ # --- Guard: missing API key
193
+ if not self.config.api_key:
194
+ msg = "DEEPSEEK_API_KEY not set — skipping audit"
195
+ _log.warning(msg)
196
+ summary.update(skipped=True, skip_reason=msg)
197
+ return summary
198
+
199
+ # --- Guard: paused
200
+ if self._is_paused():
201
+ msg = "Daemon paused (remove .audit_daemon_paused to resume)"
202
+ _log.info(msg)
203
+ summary.update(skipped=True, skip_reason=msg)
204
+ return summary
205
+
206
+ # --- Guard: lock check (standalone invocation — run_forever already holds it)
207
+ lock_path = self.project_root / _LOCK_FILE
208
+ if lock_path.exists():
209
+ try:
210
+ pid = int(lock_path.read_text().strip())
211
+ if pid != os.getpid() and _pid_is_running(pid):
212
+ msg = f"Another daemon is running (PID {pid}) — skipping"
213
+ _log.info(msg)
214
+ summary.update(skipped=True, skip_reason=msg)
215
+ return summary
216
+ except (ValueError, OSError):
217
+ pass
218
+ lock_path.unlink(missing_ok=True)
219
+
220
+ # --- Guard: dirty working tree
221
+ if not self._is_tree_clean():
222
+ msg = "Working tree is dirty — skipping audit to avoid conflicts"
223
+ _log.info(msg)
224
+ summary.update(skipped=True, skip_reason=msg)
225
+ return summary
226
+
227
+ _log.info("=== Audit cycle starting ===")
228
+
229
+ # Step 1: Collect metrics
230
+ metrics = self._collect_metrics()
231
+ summary["metrics"] = metrics
232
+ _log.info("Metrics: %s", {k: round(v, 4) for k, v in metrics.items()})
233
+
234
+ # Step 2: Load historical metrics and detect regressions
235
+ history = self._load_metric_history()
236
+ is_first_run = not history
237
+ regressions = [] if is_first_run else self._detect_regressions(metrics, history)
238
+ summary["regressions"] = regressions
239
+ if regressions:
240
+ _log.warning("Regressions detected: %s", regressions)
241
+
242
+ # Step 3: First run — just collect baseline, skip improvements
243
+ if is_first_run:
244
+ _log.info("First run — collecting baseline metrics, no improvements attempted")
245
+ self._append_metrics(metrics, deltas={})
246
+ self.last_audit_time = datetime.datetime.now()
247
+ return summary
248
+
249
+ # Step 4: Decide whether improvements are needed
250
+ needs_improvement = (
251
+ metrics.get("test_pass_rate", 1.0) < 1.0
252
+ or metrics.get("syntax_error_rate", 0.0) > 0.0
253
+ or metrics.get("lint_issue_count", 0) > 0
254
+ or bool(regressions)
255
+ )
256
+
257
+ if not needs_improvement:
258
+ _log.info("All metrics healthy — no improvements needed this cycle")
259
+ prev_metrics = {m["metric"]: m["value"] for m in history[-len(METRIC_NAMES):]}
260
+ deltas = {k: metrics[k] - prev_metrics.get(k, metrics[k]) for k in metrics}
261
+ self._append_metrics(metrics, deltas)
262
+ self.last_audit_time = datetime.datetime.now()
263
+ return summary
264
+
265
+ # Step 5: Find issues via analytics.smells (compose — do not reimplement)
266
+ from .analytics.smells import detect_smells
267
+ all_smells = detect_smells(str(self.project_root / "luckyd_code"))
268
+ all_issues: list[dict[str, Any]] = sorted(
269
+ [
270
+ {
271
+ "file": s.file,
272
+ "line": s.line,
273
+ "kind": s.kind,
274
+ "detail": s.message,
275
+ "priority": {"error": 1, "warning": 2, "info": 3}.get(s.severity, 2),
276
+ }
277
+ for s in all_smells
278
+ ],
279
+ key=lambda i: (i["priority"], i["file"], i["line"]),
280
+ )
281
+ _log.info("Found %d source issues", len(all_issues))
282
+
283
+ # Filter files on cooldown or high backoff
284
+ now = datetime.datetime.now().timestamp()
285
+ eligible_issues = [
286
+ issue for issue in all_issues
287
+ if (now - self._file_last_improved.get(issue["file"], 0.0)) >= _FILE_COOLDOWN_SECS
288
+ and self._file_fail_count.get(issue["file"], 0) < 3
289
+ ]
290
+
291
+ if not eligible_issues:
292
+ _log.info("No eligible issues (all files on cooldown or backed off)")
293
+ self._append_metrics(metrics, deltas={})
294
+ self.last_audit_time = datetime.datetime.now()
295
+ return summary
296
+
297
+ # Step 6: Filter out unfixable smells and attempt ONE improvement per cycle.
298
+ # Processing multiple issues per cycle is unsafe: after the first commit
299
+ # the second attempt's baseline snapshot is stale.
300
+ fixable_issues = [
301
+ i for i in eligible_issues
302
+ if i["kind"] in _FIXABLE_SMELL_KINDS
303
+ ]
304
+ if not fixable_issues:
305
+ _log.info(
306
+ "No fixable issues — %d eligible but none mappable "
307
+ "(available kinds: %s)", len(eligible_issues),
308
+ sorted(_FIXABLE_SMELL_KINDS),
309
+ )
310
+ self._append_metrics(metrics, deltas={})
311
+ self.last_audit_time = datetime.datetime.now()
312
+ return summary
313
+
314
+ target_issue = fixable_issues[0]
315
+ target_issue["_area"] = _FIXABLE_SMELL_KINDS[target_issue["kind"]]
316
+ _log.info(
317
+ "Target issue: %s (kind=%s, area=%s, file=%s line=%d)",
318
+ target_issue["detail"], target_issue["kind"],
319
+ target_issue["_area"], target_issue["file"],
320
+ target_issue["line"],
321
+ )
322
+
323
+ ok = self._attempt_improvement(target_issue, metrics)
324
+ summary["improvements_attempted"] = 1
325
+ rel = target_issue["file"]
326
+
327
+ if ok:
328
+ summary["improvements_committed"] = 1
329
+ self.improvement_count += 1
330
+ self._file_last_improved[rel] = datetime.datetime.now().timestamp()
331
+ self._file_fail_count.pop(rel, None)
332
+ else:
333
+ self._file_fail_count[rel] = self._file_fail_count.get(rel, 0) + 1
334
+
335
+ # Step 7: Update metrics
336
+ post_metrics = self._collect_metrics() if ok else metrics
337
+ prev_metrics = {m["metric"]: m["value"] for m in history[-len(METRIC_NAMES):]}
338
+ deltas = {
339
+ k: post_metrics[k] - prev_metrics.get(k, post_metrics[k])
340
+ for k in post_metrics
341
+ }
342
+ self._append_metrics(post_metrics, deltas)
343
+
344
+ _log.info(
345
+ "=== Audit cycle complete: %d committed, 1 attempted ===",
346
+ summary["improvements_committed"],
347
+ )
348
+ self.last_audit_time = datetime.datetime.now()
349
+ return summary
350
+
351
+ # ------------------------------------------------------------------ #
352
+ # Improvement orchestration
353
+ # ------------------------------------------------------------------ #
354
+
355
+ def _attempt_improvement(self, issue: dict, baseline_metrics: dict) -> bool:
356
+ """Try to fix one issue. Returns True if committed successfully."""
357
+ from .self_improve import ImprovementTracker, get_improvement_prompt
358
+ from .verify import run_verify_pipeline, pipeline_all_passed, pipeline_feedback
359
+ from .context import ConversationContext
360
+ from ._agent_loop import run_agent_loop, RunConfig
361
+ from .tools import get_default_registry
362
+
363
+ rel_file = issue["file"]
364
+ kind = issue["kind"]
365
+
366
+ _log.info("Attempting improvement: %s in %s", kind, rel_file)
367
+
368
+ area = issue.get("_area") or _FIXABLE_SMELL_KINDS.get(kind)
369
+ if area is None:
370
+ _log.info(
371
+ "Skipping smell '%s' — not mapped to an improvement area "
372
+ "(only %s are auto-fixable)", kind, sorted(_FIXABLE_SMELL_KINDS),
373
+ )
374
+ return False
375
+
376
+ base_prompt = get_improvement_prompt(area)
377
+ task = (
378
+ f"{base_prompt}\n\n"
379
+ f"**Specific issue to fix:** {issue['detail']}\n"
380
+ f"**File:** {rel_file}\n"
381
+ f"**Line:** {issue['line']}\n"
382
+ f"**Kind:** {kind}\n\n"
383
+ f"Fix only this specific issue. Do not make unrelated changes."
384
+ )
385
+
386
+ tracker = ImprovementTracker(cwd=str(self.project_root))
387
+ snap_msg = tracker.snapshot()
388
+ _log.debug("Tracker snapshot: %s", snap_msg)
389
+
390
+ registry = get_default_registry()
391
+ ctx = ConversationContext(
392
+ system_prompt=self.config.system_prompt,
393
+ max_messages=40,
394
+ config=self.config,
395
+ model=self.config.model,
396
+ )
397
+ ctx.add_user_message(task)
398
+
399
+ run_cfg = RunConfig(
400
+ label="self-improve",
401
+ verify_edits=True,
402
+ max_verify_retries=2,
403
+ run_tests=False,
404
+ project_root=str(self.project_root),
405
+ )
406
+
407
+ # More complex tasks get more turns
408
+ max_turns = 12 if area == "refactor" else 8
409
+
410
+ try:
411
+ run_agent_loop(
412
+ context=ctx,
413
+ config=self.config,
414
+ tools=registry.list_tools(),
415
+ registry=registry,
416
+ max_turns=max_turns,
417
+ label="self-improve",
418
+ run_config=run_cfg,
419
+ )
420
+ except Exception as exc:
421
+ _log.error("Agent loop error: %s", exc)
422
+ # Get whatever files the agent may have created before crashing
423
+ try:
424
+ crash_report = tracker.report(commit=False)
425
+ agent_files = crash_report.files_changed
426
+ except Exception:
427
+ agent_files = []
428
+ self._rollback(agent_new_files=agent_files)
429
+ return False
430
+
431
+ # Verify changed files (compose with verify module — do not reimplement)
432
+ report = tracker.report(commit=False)
433
+ changed = report.files_changed
434
+
435
+ if not changed:
436
+ _log.info("Agent made no file changes — nothing to commit")
437
+ return False
438
+
439
+ all_ok = True
440
+ for fp in changed:
441
+ abs_fp = str(self.project_root / fp)
442
+ if not abs_fp.endswith(".py"):
443
+ continue
444
+ results = run_verify_pipeline(
445
+ file_path=abs_fp,
446
+ project_root=str(self.project_root),
447
+ run_lint=True,
448
+ run_consistency=True,
449
+ run_tests=False,
450
+ )
451
+ if not pipeline_all_passed(results):
452
+ _log.warning(
453
+ "Verification failed for %s:\n%s", fp, pipeline_feedback(results)
454
+ )
455
+ all_ok = False
456
+
457
+ if not all_ok:
458
+ _log.warning("Verification failed — rolling back working tree")
459
+ self._rollback(agent_new_files=changed)
460
+ return False
461
+
462
+ # Check metrics didn't regress
463
+ post_metrics = self._collect_metrics()
464
+ if self._is_regression(baseline_metrics, post_metrics):
465
+ _log.warning("Metrics regressed after change — rolling back")
466
+ self._rollback(agent_new_files=changed)
467
+ return False
468
+
469
+ # Commit
470
+ commit_msg = (
471
+ f"self-improve: fix {kind} in {rel_file} "
472
+ f"[auto, cycle #{self.improvement_count + 1}]"
473
+ )
474
+ final_report = tracker.report(commit=True, commit_msg=commit_msg)
475
+ _log.info(
476
+ "Committed: %s — files: %s",
477
+ final_report.commit_hash, final_report.files_changed,
478
+ )
479
+
480
+ self._log_to_changelog(
481
+ description=f"Fix {kind}: {issue['detail']} in `{rel_file}`",
482
+ files=final_report.files_changed,
483
+ metrics_delta={
484
+ k: post_metrics[k] - baseline_metrics.get(k, post_metrics[k])
485
+ for k in post_metrics
486
+ },
487
+ )
488
+ return True
489
+
490
+ # ------------------------------------------------------------------ #
491
+ # Metrics — compose with verify and analytics modules
492
+ # ------------------------------------------------------------------ #
493
+
494
+ def _collect_metrics(self) -> dict[str, float]:
495
+ """Measure current state of all tracked benchmarks.
496
+
497
+ Composes with existing modules:
498
+ - test_pass_rate: pytest via subprocess
499
+ - syntax_error_rate: verify.verify_syntax() per file
500
+ - lint_issue_count: verify.verify_lint() on the package directory
501
+ - todo_count: analytics.smells.detect_smells()
502
+ """
503
+ from .verify import verify_syntax, verify_lint
504
+ from .analytics.smells import detect_smells
505
+
506
+ metrics: dict[str, float] = {}
507
+
508
+ # test_pass_rate — pytest subprocess (run_verify_pipeline is per-file only)
509
+ pass_rate = self._run_pytest()
510
+ # None means pytest couldn't run (timeout, missing, etc.) —
511
+ # use the last known value from history so a transient infra
512
+ # hiccup doesn't look like a regression.
513
+ if pass_rate is None:
514
+ history = self._load_metric_history()
515
+ prev = {row["metric"]: row["value"] for row in history}
516
+ pass_rate = prev.get("test_pass_rate", 1.0)
517
+ _log.warning(
518
+ "pytest unavailable — using last known test_pass_rate: %.4f", pass_rate
519
+ )
520
+ metrics["test_pass_rate"] = pass_rate
521
+
522
+ # syntax_error_rate — verify_syntax per Python file
523
+ pkg = self.project_root / "luckyd_code"
524
+ py_files = [f for f in pkg.rglob("*.py") if "__pycache__" not in f.parts]
525
+ total = len(py_files)
526
+ if total:
527
+ error_count = sum(1 for f in py_files if not verify_syntax(str(f)).passed)
528
+ metrics["syntax_error_rate"] = round(error_count / total, 4)
529
+ else:
530
+ metrics["syntax_error_rate"] = 0.0
531
+
532
+ # lint_issue_count — verify_lint on the whole package dir (ruff/flake8 accept dirs)
533
+ lint_result = verify_lint(str(pkg), str(self.project_root))
534
+ if lint_result is not None and not lint_result.passed:
535
+ issue_lines = [
536
+ ln for ln in lint_result.raw_output.splitlines() if ln.strip()
537
+ ]
538
+ metrics["lint_issue_count"] = float(max(1, len(issue_lines)))
539
+ else:
540
+ metrics["lint_issue_count"] = 0.0
541
+
542
+ # todo_count — detect_smells; count smells whose message contains TODO markers
543
+ _TODO_MARKERS = ("TODO", "FIXME", "HACK", "XXX")
544
+ smells = detect_smells(str(pkg))
545
+ metrics["todo_count"] = float(
546
+ sum(
547
+ 1 for s in smells
548
+ if any(marker in s.message.upper() for marker in _TODO_MARKERS)
549
+ )
550
+ )
551
+
552
+ return metrics
553
+
554
+ def _run_pytest(self) -> Optional[float]:
555
+ """Run pytest in quiet mode and return pass rate (0.0–1.0), or None on failure.
556
+
557
+ Returns None if pytest cannot run (timeout, missing, subprocess error)
558
+ so callers can distinguish "all tests passed" from "couldn't run tests".
559
+ """
560
+ timeout_secs = 180
561
+ try:
562
+ proc = subprocess.run(
563
+ [
564
+ sys.executable, "-m", "pytest",
565
+ "--tb=no", "-q", "--no-header",
566
+ "--ignore=.venv", "--ignore=.mypy_cache",
567
+ ],
568
+ capture_output=True,
569
+ text=True,
570
+ timeout=timeout_secs,
571
+ cwd=str(self.project_root),
572
+ )
573
+ combined = (proc.stdout + proc.stderr).strip()
574
+ passed = failed = 0
575
+ for line in reversed(combined.splitlines()):
576
+ m_pass = re.search(r"(\d+) passed", line)
577
+ m_fail = re.search(r"(\d+) failed", line)
578
+ m_error = re.search(r"(\d+) error", line)
579
+ if m_pass or m_fail or m_error:
580
+ passed = int(m_pass.group(1)) if m_pass else 0
581
+ failed = (int(m_fail.group(1)) if m_fail else 0) + (
582
+ int(m_error.group(1)) if m_error else 0
583
+ )
584
+ break
585
+ total = passed + failed
586
+ return round(passed / total, 4) if total > 0 else 1.0
587
+ except subprocess.TimeoutExpired:
588
+ _log.error("pytest timed out after %ds — tests may be hanging", timeout_secs)
589
+ return None
590
+ except Exception as e:
591
+ _log.error("Could not run pytest: %s", e)
592
+ return None
593
+
594
+ def _load_metric_history(self) -> list[dict]:
595
+ """Load all rows from time_series.jsonl."""
596
+ if not self._ts_file.exists():
597
+ return []
598
+ rows = []
599
+ for line in self._ts_file.read_text(encoding="utf-8").splitlines():
600
+ line = line.strip()
601
+ if not line:
602
+ continue
603
+ try:
604
+ rows.append(json.loads(line))
605
+ except json.JSONDecodeError:
606
+ pass
607
+ return rows
608
+
609
+ def _append_metrics(self, metrics: dict[str, float], deltas: dict[str, float]) -> None:
610
+ """Write current metrics to time_series.jsonl."""
611
+ ts = datetime.datetime.now().isoformat(timespec="seconds")
612
+ self._ts_file.parent.mkdir(parents=True, exist_ok=True)
613
+ with self._ts_file.open("a", encoding="utf-8") as fh:
614
+ for metric, value in metrics.items():
615
+ row = {
616
+ "timestamp": ts,
617
+ "metric": metric,
618
+ "value": value,
619
+ "delta": round(deltas.get(metric, 0.0), 6),
620
+ }
621
+ fh.write(json.dumps(row) + "\n")
622
+
623
+ def _detect_regressions(
624
+ self, current: dict[str, float], history: list[dict]
625
+ ) -> list[str]:
626
+ """Compare current metrics against history and flag regressions."""
627
+ regressions: list[str] = []
628
+ prev: dict[str, float] = {}
629
+ for row in history:
630
+ prev[row["metric"]] = row["value"]
631
+
632
+ # Higher is better
633
+ for metric in ("test_pass_rate",):
634
+ if metric in prev and metric in current:
635
+ if current[metric] < prev[metric] - 0.01:
636
+ regressions.append(
637
+ f"{metric}: {prev[metric]:.3f} -> {current[metric]:.3f}"
638
+ )
639
+
640
+ # Lower is better
641
+ for metric in ("syntax_error_rate", "lint_issue_count", "todo_count"):
642
+ if metric in prev and metric in current:
643
+ if current[metric] > prev[metric] + 1:
644
+ regressions.append(
645
+ f"{metric}: {prev[metric]:.1f} -> {current[metric]:.1f}"
646
+ )
647
+ return regressions
648
+
649
+ def _is_regression(
650
+ self, baseline: dict[str, float], post: dict[str, float]
651
+ ) -> bool:
652
+ """Return True if post-metrics are strictly worse than baseline."""
653
+ if post.get("test_pass_rate", 1.0) < baseline.get("test_pass_rate", 1.0) - 0.005:
654
+ return True
655
+ if post.get("syntax_error_rate", 0.0) > baseline.get("syntax_error_rate", 0.0) + 0.01:
656
+ return True
657
+ return False
658
+
659
+ # ------------------------------------------------------------------ #
660
+ # Git helpers
661
+ # ------------------------------------------------------------------ #
662
+
663
+ def _is_tree_clean(self) -> bool:
664
+ """Return True if the git working tree has no uncommitted changes.
665
+
666
+ Returns True (optimistic) on subprocess errors — a transient git
667
+ failure (e.g., Windows file locking) should not stall the daemon
668
+ for hours. The daemon's own snapshot/rollback guards provide a
669
+ second layer of safety.
670
+ """
671
+ try:
672
+ result = subprocess.run(
673
+ ["git", "status", "--porcelain"],
674
+ capture_output=True, text=True, timeout=15,
675
+ cwd=str(self.project_root),
676
+ )
677
+ if result.returncode != 0:
678
+ _log.warning(
679
+ "git status exited %d (assuming clean): %s",
680
+ result.returncode, result.stderr.strip()[:200],
681
+ )
682
+ return True
683
+ return result.stdout.strip() == ""
684
+ except Exception as exc:
685
+ _log.warning("git status check failed (assuming clean): %s", exc)
686
+ return True
687
+
688
+ def _rollback(self, agent_new_files: Optional[list] = None) -> None:
689
+ """Roll back changes made by the agent.
690
+
691
+ Restores tracked files via ``git checkout -- .`` and removes only
692
+ the untracked files that the agent created (passed in as
693
+ *agent_new_files*). Using ``git clean -fd`` was unsafe because it
694
+ deleted ALL untracked user files in the project, not just those
695
+ created by the improvement session.
696
+ """
697
+ try:
698
+ subprocess.run(
699
+ ["git", "checkout", "--", "."],
700
+ capture_output=True, text=True, timeout=15,
701
+ cwd=str(self.project_root),
702
+ check=False,
703
+ )
704
+ _log.info("Rolled back tracked file changes to HEAD")
705
+ except Exception as exc:
706
+ _log.error("git checkout rollback failed: %s", exc)
707
+
708
+ # Remove only files the agent created (untracked before this session)
709
+ if agent_new_files:
710
+ for rel_path in agent_new_files:
711
+ abs_path = self.project_root / rel_path
712
+ try:
713
+ if abs_path.exists() and abs_path.is_file():
714
+ abs_path.unlink()
715
+ _log.info("Removed agent-created file: %s", rel_path)
716
+ except Exception as exc:
717
+ _log.warning("Could not remove %s: %s", rel_path, exc)
718
+
719
+ # ------------------------------------------------------------------ #
720
+ # Locking
721
+ # ------------------------------------------------------------------ #
722
+
723
+ def _acquire_lock(self) -> bool:
724
+ """Write a PID lock file.
725
+
726
+ Returns True if the lock was acquired (or we already hold it).
727
+ Returns False if another live process holds the lock.
728
+ Removes stale lock files automatically.
729
+ """
730
+ lock_path = self.project_root / _LOCK_FILE
731
+ if lock_path.exists():
732
+ try:
733
+ pid = int(lock_path.read_text().strip())
734
+ if pid == os.getpid():
735
+ return True # we already hold it (re-entrant call)
736
+ if _pid_is_running(pid):
737
+ return False
738
+ # Process is gone — stale lock, remove and proceed
739
+ lock_path.unlink(missing_ok=True)
740
+ except (ValueError, OSError):
741
+ lock_path.unlink(missing_ok=True)
742
+ try:
743
+ lock_path.write_text(str(os.getpid()), encoding="utf-8")
744
+ return True
745
+ except OSError as exc:
746
+ _log.error("Could not write lock file %s: %s", lock_path, exc)
747
+ return False
748
+
749
+ def _release_lock(self) -> None:
750
+ """Remove the PID lock file if we own it."""
751
+ lock_path = self.project_root / _LOCK_FILE
752
+ try:
753
+ if lock_path.exists():
754
+ content = lock_path.read_text(encoding="utf-8").strip()
755
+ if content == str(os.getpid()):
756
+ lock_path.unlink(missing_ok=True)
757
+ except OSError:
758
+ pass
759
+
760
+ # ------------------------------------------------------------------ #
761
+ # Changelog
762
+ # ------------------------------------------------------------------ #
763
+
764
+ def _log_to_changelog(
765
+ self,
766
+ description: str,
767
+ files: list[str],
768
+ metrics_delta: dict[str, float],
769
+ ) -> None:
770
+ """Append an entry to CHANGELOG.md."""
771
+ changelog = self.project_root / "CHANGELOG.md"
772
+ ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
773
+ delta_str = ", ".join(
774
+ f"{k}: {'+' if v >= 0 else ''}{v:.4f}"
775
+ for k, v in metrics_delta.items()
776
+ if abs(v) > 0.0001
777
+ )
778
+ entry = (
779
+ f"\n### [{ts}] Self-improvement #{self.improvement_count}\n\n"
780
+ f"**Change:** {description}\n\n"
781
+ f"**Files:** {', '.join(f'`{f}`' for f in files)}\n\n"
782
+ )
783
+ if delta_str:
784
+ entry += f"**Metric deltas:** {delta_str}\n"
785
+
786
+ try:
787
+ if changelog.exists():
788
+ content = changelog.read_text(encoding="utf-8")
789
+ lines = content.splitlines(keepends=True)
790
+ insert_at = 1
791
+ changelog.write_text(
792
+ "".join(lines[:insert_at]) + entry + "".join(lines[insert_at:]),
793
+ encoding="utf-8",
794
+ )
795
+ else:
796
+ changelog.write_text(f"# Changelog\n{entry}", encoding="utf-8")
797
+ except OSError as exc:
798
+ _log.warning("Could not update CHANGELOG.md: %s", exc)
799
+
800
+ # ------------------------------------------------------------------ #
801
+ # Status / metrics dump (used by CLI audit subcommands)
802
+ # ------------------------------------------------------------------ #
803
+
804
+ def status(self) -> str:
805
+ """Return a human-readable status summary."""
806
+ last = self.last_audit_time.isoformat() if self.last_audit_time else "never"
807
+ paused = "(PAUSED)" if self._is_paused() else ""
808
+ lock_path = self.project_root / _LOCK_FILE
809
+ try:
810
+ locked = f"(LOCKED by PID {lock_path.read_text(encoding='utf-8').strip()})" if lock_path.exists() else ""
811
+ except OSError:
812
+ locked = "(LOCKED — could not read lock file)"
813
+ lines = [
814
+ f"Audit daemon status {paused} {locked}".strip(),
815
+ f" Last audit: {last}",
816
+ f" Improvements made: {self.improvement_count}",
817
+ f" Interval: {self.interval_minutes}m",
818
+ f" Project: {self.project_root}",
819
+ f" Metrics file: {self._ts_file}",
820
+ ]
821
+ history = self._load_metric_history()
822
+ if history:
823
+ last_by_metric: dict[str, dict] = {}
824
+ for row in history:
825
+ last_by_metric[row["metric"]] = row
826
+ lines.append("\n Latest metrics:")
827
+ for name, row in sorted(last_by_metric.items()):
828
+ delta_str = (
829
+ f" ({'+' if row['delta'] >= 0 else ''}{row['delta']:.4f})"
830
+ if row["delta"] != 0
831
+ else ""
832
+ )
833
+ lines.append(f" {name:<28} {row['value']:.4f}{delta_str}")
834
+ return "\n".join(lines)
835
+
836
+ def metrics_json(self) -> str:
837
+ """Dump full metrics history as JSON."""
838
+ return json.dumps(self._load_metric_history(), indent=2)
839
+
840
+ # ------------------------------------------------------------------ #
841
+ # Internal helpers
842
+ # ------------------------------------------------------------------ #
843
+
844
+ def _is_paused(self) -> bool:
845
+ return (self.project_root / _PAUSE_MARKER).exists()