@gaia-minds/assistant-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,912 @@
1
+ #!/usr/bin/env python3
2
+ """Gaia Minds Self-Evolving Agent -- Main Loop.
3
+
4
+ This is the reasoning core of an autonomous agent that:
5
+ 1. Gathers state from the repository and GitHub
6
+ 2. Asks Claude what to do (with Constitution as system constraint)
7
+ 3. Checks alignment of proposed actions
8
+ 4. Executes approved actions
9
+ 5. Writes memory (decisions, lessons, state)
10
+ 6. Evolves by learning from outcomes
11
+
12
+ All reasoning traces are logged to tools/agent-memory/ for transparency.
13
+
14
+ Run locally:
15
+ python3 tools/agent-loop.py # single cycle
16
+ python3 tools/agent-loop.py --mode continuous # loop with interval
17
+ python3 tools/agent-loop.py --dry-run # plan but don't execute
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import json
24
+ import logging
25
+ import os
26
+ import sys
27
+ import time
28
+ from datetime import datetime, timezone
29
+ from pathlib import Path
30
+ from typing import Any, Dict, List, Optional
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Resolve paths and add tools/ to sys.path for sibling imports
34
+ # ---------------------------------------------------------------------------
35
+
36
+ SCRIPT_DIR = Path(__file__).resolve().parent
37
+ REPO_ROOT = SCRIPT_DIR.parent
38
+ sys.path.insert(0, str(SCRIPT_DIR))
39
+
40
+ # Sibling modules -- imported after path setup
41
+ from agent_alignment import ( # noqa: E402
42
+ AlignmentResult,
43
+ check_alignment,
44
+ classify_risk,
45
+ load_constitution,
46
+ )
47
+ from agent_actions import ( # noqa: E402
48
+ ActionResult,
49
+ RepoState,
50
+ execute_action,
51
+ gather_state,
52
+ )
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Optional: Anthropic SDK
56
+ # ---------------------------------------------------------------------------
57
+
58
+ try:
59
+ import anthropic
60
+
61
+ _HAS_ANTHROPIC = True
62
+ except ImportError:
63
+ anthropic = None # type: ignore[assignment]
64
+ _HAS_ANTHROPIC = False
65
+
66
+ # Optional: PyYAML
67
+ try:
68
+ import yaml
69
+
70
+ _HAS_YAML = True
71
+ except ImportError:
72
+ yaml = None # type: ignore[assignment]
73
+ _HAS_YAML = False
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # Logging
77
+ # ---------------------------------------------------------------------------
78
+
79
+ LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
80
+ LOG_DATE_FORMAT = "%H:%M:%S"
81
+
82
+ log = logging.getLogger("gaia-agent")
83
+
84
+ # Defaults for dual-track evolution behavior.
85
+ DEFAULT_TRACKS: Dict[str, Dict[str, Any]] = {
86
+ "assistant": {
87
+ "weight": 7,
88
+ "description": "Improve user-facing personal assistant behavior and reliability",
89
+ "allowed_actions": [
90
+ "verify_resources",
91
+ "generate_indexes",
92
+ "add_research",
93
+ "add_resource",
94
+ "update_skill",
95
+ "open_issue",
96
+ "comment_on_pr",
97
+ "check_pr_status",
98
+ ],
99
+ },
100
+ "framework": {
101
+ "weight": 3,
102
+ "description": "Improve Gaia self-evolving framework and governance tooling",
103
+ "allowed_actions": [
104
+ "verify_resources",
105
+ "generate_indexes",
106
+ "create_tool",
107
+ "update_skill",
108
+ "open_issue",
109
+ "comment_on_pr",
110
+ "check_pr_status",
111
+ ],
112
+ },
113
+ }
114
+
115
+ DEFAULT_BUDGET_POLICY: Dict[str, Any] = {
116
+ "user_service_pct": 80,
117
+ "self_improvement_pct": 20,
118
+ "hard_cycle_token_cap": 12000,
119
+ }
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Config
124
+ # ---------------------------------------------------------------------------
125
+
126
+ CONFIG_PATH = SCRIPT_DIR / "agent-config.yml"
127
+ MEMORY_DIR = SCRIPT_DIR / "agent-memory"
128
+ DECISIONS_PATH = MEMORY_DIR / "decisions.jsonl"
129
+ LESSONS_PATH = MEMORY_DIR / "lessons.jsonl"
130
+ STATE_PATH = MEMORY_DIR / "state.json"
131
+
132
+
133
+ def load_config(path: Path = CONFIG_PATH) -> Dict[str, Any]:
134
+ """Load agent-config.yml."""
135
+ text = path.read_text(encoding="utf-8")
136
+ if _HAS_YAML:
137
+ return yaml.safe_load(text)
138
+ # Minimal fallback -- parse enough to run
139
+ import re
140
+
141
+ config: Dict[str, Any] = {}
142
+ # Extract key top-level scalars
143
+ for match in re.finditer(r"^ (\w+):\s*\"?([^\"#\n]+)\"?", text, re.MULTILINE):
144
+ key, val = match.group(1).strip(), match.group(2).strip()
145
+ config[key] = val
146
+ return config
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # Memory
151
+ # ---------------------------------------------------------------------------
152
+
153
+
154
+ def load_memory(n_decisions: int = 10, n_lessons: int = 10) -> Dict[str, Any]:
155
+ """Load recent decisions, lessons, and state from agent-memory/."""
156
+ memory: Dict[str, Any] = {
157
+ "recent_decisions": [],
158
+ "lessons": [],
159
+ "state": {},
160
+ }
161
+
162
+ # Decisions (last N lines)
163
+ if DECISIONS_PATH.exists():
164
+ lines = DECISIONS_PATH.read_text(encoding="utf-8").strip().splitlines()
165
+ for line in lines[-n_decisions:]:
166
+ line = line.strip()
167
+ if line:
168
+ try:
169
+ memory["recent_decisions"].append(json.loads(line))
170
+ except json.JSONDecodeError:
171
+ pass
172
+
173
+ # Lessons (last N lines)
174
+ if LESSONS_PATH.exists():
175
+ lines = LESSONS_PATH.read_text(encoding="utf-8").strip().splitlines()
176
+ for line in lines[-n_lessons:]:
177
+ line = line.strip()
178
+ if line:
179
+ try:
180
+ memory["lessons"].append(json.loads(line))
181
+ except json.JSONDecodeError:
182
+ pass
183
+
184
+ # State
185
+ if STATE_PATH.exists():
186
+ try:
187
+ memory["state"] = json.loads(STATE_PATH.read_text(encoding="utf-8"))
188
+ except json.JSONDecodeError:
189
+ pass
190
+
191
+ return memory
192
+
193
+
194
+ def log_decision(
195
+ cycle: int,
196
+ action: Dict[str, Any],
197
+ alignment: AlignmentResult,
198
+ outcome: str,
199
+ details: str = "",
200
+ active_track: str = "unknown",
201
+ ) -> None:
202
+ """Append a decision record to decisions.jsonl."""
203
+ record = {
204
+ "timestamp": datetime.now(timezone.utc).isoformat(),
205
+ "cycle": cycle,
206
+ "track": active_track,
207
+ "action": action.get("type", "unknown"),
208
+ "action_params": action.get("params", {}),
209
+ "reasoning": alignment.reasoning[:500],
210
+ "alignment_check": "allowed" if alignment.allowed else "denied",
211
+ "risk_level": alignment.risk_level,
212
+ "outcome": outcome,
213
+ "details": details[:500],
214
+ }
215
+ with open(DECISIONS_PATH, "a", encoding="utf-8") as f:
216
+ f.write(json.dumps(record) + "\n")
217
+
218
+
219
+ def log_lesson(cycle: int, lesson: str, source: str, context: str = "") -> None:
220
+ """Append a lesson to lessons.jsonl."""
221
+ record = {
222
+ "timestamp": datetime.now(timezone.utc).isoformat(),
223
+ "cycle": cycle,
224
+ "lesson": lesson,
225
+ "source": source,
226
+ "context": context[:300],
227
+ }
228
+ with open(LESSONS_PATH, "a", encoding="utf-8") as f:
229
+ f.write(json.dumps(record) + "\n")
230
+
231
+
232
+ def update_state(
233
+ cycle: int,
234
+ results: List[ActionResult],
235
+ active_track: str = "unknown",
236
+ budget_policy: Optional[Dict[str, Any]] = None,
237
+ ) -> None:
238
+ """Update state.json with cycle results."""
239
+ state = {}
240
+ if STATE_PATH.exists():
241
+ try:
242
+ state = json.loads(STATE_PATH.read_text(encoding="utf-8"))
243
+ except json.JSONDecodeError:
244
+ pass
245
+
246
+ state["last_cycle"] = cycle
247
+ state["last_run"] = datetime.now(timezone.utc).isoformat()
248
+ state["last_track"] = active_track
249
+ state["total_actions"] = state.get("total_actions", 0) + len(results)
250
+ track_counts = state.get("track_counts", {})
251
+ track_counts[active_track] = track_counts.get(active_track, 0) + 1
252
+ state["track_counts"] = track_counts
253
+ if budget_policy:
254
+ state["budget_policy"] = budget_policy
255
+
256
+ for r in results:
257
+ if r.artifacts.get("pr_url"):
258
+ state["total_prs_created"] = state.get("total_prs_created", 0) + 1
259
+
260
+ STATE_PATH.write_text(json.dumps(state, indent=2) + "\n", encoding="utf-8")
261
+
262
+
263
+ def commit_memory(cycle: int) -> None:
264
+ """Commit memory changes to git."""
265
+ import subprocess
266
+
267
+ try:
268
+ subprocess.run(
269
+ ["git", "add", str(MEMORY_DIR)],
270
+ cwd=str(REPO_ROOT),
271
+ capture_output=True,
272
+ timeout=30,
273
+ )
274
+ # Check if there are staged changes
275
+ result = subprocess.run(
276
+ ["git", "diff", "--cached", "--quiet", str(MEMORY_DIR)],
277
+ cwd=str(REPO_ROOT),
278
+ capture_output=True,
279
+ timeout=30,
280
+ )
281
+ if result.returncode != 0: # there are changes
282
+ subprocess.run(
283
+ ["git", "commit", "-m", f"agent: memory update cycle {cycle}"],
284
+ cwd=str(REPO_ROOT),
285
+ capture_output=True,
286
+ timeout=30,
287
+ )
288
+ log.info("Memory committed to git (cycle %d)", cycle)
289
+ else:
290
+ log.debug("No memory changes to commit")
291
+ except Exception as exc:
292
+ log.warning("Could not commit memory: %s", exc)
293
+
294
+
295
+ def rotate_logs(config: Dict[str, Any]) -> None:
296
+ """Rotate decision and lesson logs if they exceed configured limits."""
297
+ max_decisions = config.get("memory", {}).get("max_decisions_log_entries", 500)
298
+ max_lessons = config.get("memory", {}).get("max_lessons_entries", 100)
299
+
300
+ for path, limit in [(DECISIONS_PATH, max_decisions), (LESSONS_PATH, max_lessons)]:
301
+ if not path.exists():
302
+ continue
303
+ lines = path.read_text(encoding="utf-8").strip().splitlines()
304
+ if len(lines) > limit:
305
+ # Keep the most recent entries
306
+ path.write_text("\n".join(lines[-limit:]) + "\n", encoding="utf-8")
307
+ log.info("Rotated %s: kept last %d of %d entries", path.name, limit, len(lines))
308
+
309
+
310
+ def normalized_track_config(config: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
311
+ """Return normalized track configuration with safe defaults."""
312
+ tracks_cfg = config.get("evolution", {}).get("tracks", {})
313
+ out: Dict[str, Dict[str, Any]] = {}
314
+ for name, defaults in DEFAULT_TRACKS.items():
315
+ configured = tracks_cfg.get(name, {})
316
+ weight = configured.get("weight", defaults["weight"])
317
+ try:
318
+ weight_int = int(weight)
319
+ except (TypeError, ValueError):
320
+ weight_int = int(defaults["weight"])
321
+ if weight_int < 1:
322
+ weight_int = 1
323
+ allowed_actions = configured.get("allowed_actions", defaults["allowed_actions"])
324
+ if not isinstance(allowed_actions, list):
325
+ allowed_actions = defaults["allowed_actions"]
326
+ out[name] = {
327
+ "weight": weight_int,
328
+ "description": configured.get("description", defaults["description"]),
329
+ "allowed_actions": allowed_actions,
330
+ }
331
+ return out
332
+
333
+
334
+ def normalized_budget_policy(config: Dict[str, Any]) -> Dict[str, Any]:
335
+ """Return normalized budget policy with safe defaults."""
336
+ budget_cfg = config.get("budget", {})
337
+ out = dict(DEFAULT_BUDGET_POLICY)
338
+ out.update(budget_cfg if isinstance(budget_cfg, dict) else {})
339
+ for key in ("user_service_pct", "self_improvement_pct", "hard_cycle_token_cap"):
340
+ try:
341
+ out[key] = int(out[key])
342
+ except (TypeError, ValueError):
343
+ out[key] = DEFAULT_BUDGET_POLICY[key]
344
+ if out["user_service_pct"] < 0:
345
+ out["user_service_pct"] = DEFAULT_BUDGET_POLICY["user_service_pct"]
346
+ if out["self_improvement_pct"] < 0:
347
+ out["self_improvement_pct"] = DEFAULT_BUDGET_POLICY["self_improvement_pct"]
348
+ if out["hard_cycle_token_cap"] < 1:
349
+ out["hard_cycle_token_cap"] = DEFAULT_BUDGET_POLICY["hard_cycle_token_cap"]
350
+ return out
351
+
352
+
353
+ def select_active_track(
354
+ config: Dict[str, Any],
355
+ cycle_number: int,
356
+ ) -> str:
357
+ """Choose active track for this cycle.
358
+
359
+ Override via GAIA_ACTIVE_TRACK_OVERRIDE=assistant|framework.
360
+ """
361
+ tracks = normalized_track_config(config)
362
+ override = os.environ.get("GAIA_ACTIVE_TRACK_OVERRIDE", "").strip().lower()
363
+ if override in tracks:
364
+ return override
365
+
366
+ scheduler = config.get("evolution", {}).get("scheduler", "weighted_round_robin")
367
+ if scheduler == "round_robin":
368
+ order = sorted(tracks.keys())
369
+ return order[(cycle_number - 1) % len(order)]
370
+
371
+ # weighted_round_robin
372
+ weighted_order: List[str] = []
373
+ for name in sorted(tracks.keys()):
374
+ weighted_order.extend([name] * int(tracks[name]["weight"]))
375
+ if not weighted_order:
376
+ return "assistant"
377
+ return weighted_order[(cycle_number - 1) % len(weighted_order)]
378
+
379
+
380
+ def action_allowed_in_track(action_type: str, active_track: str, config: Dict[str, Any]) -> bool:
381
+ """Return whether action_type is allowed in the active track policy."""
382
+ tracks = normalized_track_config(config)
383
+ track_cfg = tracks.get(active_track)
384
+ if not track_cfg:
385
+ return False
386
+ allowed_actions = track_cfg.get("allowed_actions", [])
387
+ return action_type in allowed_actions
388
+
389
+
390
+ # ---------------------------------------------------------------------------
391
+ # Claude reasoning
392
+ # ---------------------------------------------------------------------------
393
+
394
+ SYSTEM_PROMPT_TEMPLATE = """\
395
+ You are the reasoning core of Gaia Agent, a self-evolving autonomous agent for \
396
+ the Gaia Minds project. Every action you propose must align with the Constitution below.
397
+
398
+ <constitution>
399
+ {constitution}
400
+ </constitution>
401
+
402
+ You have access to these action types:
403
+ - verify_resources: Check if resource URLs are still live
404
+ - generate_indexes: Regenerate INDEX.md files
405
+ - add_research: Create a new research file (params: topic, subcategory, content)
406
+ - add_resource: Document a new resource (params: name, subcategory, content)
407
+ - create_tool: Create a new tool script (params: name, content, description)
408
+ - update_skill: Modify a skill file (params: skill_name, changes)
409
+ - open_issue: Open a GitHub issue (params: title, body, labels)
410
+ - comment_on_pr: Comment on a PR (params: pr_number, body)
411
+ - check_pr_status: Check PR status (params: pr_number)
412
+
413
+ Risk levels:
414
+ - auto_execute: verify_resources, generate_indexes (done immediately)
415
+ - auto_pr: add_research, add_resource, create_tool, update_skill (creates PR)
416
+ - require_review: anything modifying the agent itself (creates issue for human)
417
+ - forbidden: delete_constitution, disable_hooks, merge_own_pr, modify_git_history, bypass_review
418
+
419
+ IMPORTANT RULES:
420
+ - Never propose forbidden actions.
421
+ - For research/resource content, write COMPLETE, substantive content — not placeholders.
422
+ - Prioritize: stale resources > open issues > stale indexes > research gaps > tool improvements.
423
+ - If nothing needs doing, return an empty actions list. Don't invent busywork.
424
+ - Learn from past mistakes shown in the memory context.
425
+ - Be specific in your reasoning — reference what you observed in the state.
426
+ - Respect the active evolution track and only propose actions allowed for that track.
427
+ """
428
+
429
+ USER_PROMPT_TEMPLATE = """\
430
+ Current repository state:
431
+ {state_json}
432
+
433
+ Recent memory (last {n_decisions} decisions):
434
+ {decisions_json}
435
+
436
+ Lessons learned:
437
+ {lessons_json}
438
+
439
+ Agent state:
440
+ {agent_state_json}
441
+
442
+ Active evolution track for this cycle:
443
+ {active_track}
444
+
445
+ Track policy:
446
+ {track_policy_json}
447
+
448
+ Budget policy:
449
+ {budget_policy_json}
450
+
451
+ Based on the current state, what actions should I take this cycle?
452
+
453
+ Respond with valid JSON only (no markdown, no code fences):
454
+ {{
455
+ "reasoning": "Step-by-step reasoning about what needs to be done and why",
456
+ "actions": [
457
+ {{"type": "action_type", "params": {{}}, "reasoning": "why this specific action"}}
458
+ ]
459
+ }}
460
+
461
+ If nothing needs doing, return: {{"reasoning": "explanation", "actions": []}}
462
+ """
463
+
464
+
465
+ def state_to_summary(state: RepoState) -> Dict[str, Any]:
466
+ """Convert RepoState to a JSON-serializable summary for the prompt."""
467
+ return {
468
+ "open_issues_count": len(state.open_issues),
469
+ "open_issues": state.open_issues[:10],
470
+ "open_prs_count": len(state.open_prs),
471
+ "open_prs": state.open_prs[:5],
472
+ "recent_commits": state.recent_commits[:10],
473
+ "stale_resources": state.stale_resources[:10],
474
+ "stale_indexes": state.stale_indexes,
475
+ "pending_reviews": state.pending_reviews[:5],
476
+ "agent_pr_history": state.agent_pr_history[:10],
477
+ "research_topics": state.research_topics,
478
+ "resource_files_count": len(state.resource_files),
479
+ "tool_files": state.tool_files,
480
+ }
481
+
482
+
483
+ def ask_claude_for_plan(
484
+ client: Any,
485
+ config: Dict[str, Any],
486
+ state: RepoState,
487
+ memory: Dict[str, Any],
488
+ constitution: str,
489
+ active_track: str,
490
+ ) -> Dict[str, Any]:
491
+ """Ask Claude to analyze state and propose actions."""
492
+ reasoning_config = config.get("reasoning", {})
493
+ model = reasoning_config.get("model", "claude-sonnet-4-5-20250929")
494
+ max_tokens = reasoning_config.get("max_tokens", 4096)
495
+ temperature = reasoning_config.get("temperature", 0.3)
496
+
497
+ system_prompt = SYSTEM_PROMPT_TEMPLATE.format(constitution=constitution)
498
+
499
+ n_decisions = 10
500
+ n_lessons = 10
501
+ track_policy = normalized_track_config(config).get(active_track, {})
502
+ budget_policy = normalized_budget_policy(config)
503
+ user_prompt = USER_PROMPT_TEMPLATE.format(
504
+ state_json=json.dumps(state_to_summary(state), indent=2),
505
+ n_decisions=n_decisions,
506
+ decisions_json=json.dumps(memory.get("recent_decisions", []), indent=2),
507
+ lessons_json=json.dumps(memory.get("lessons", []), indent=2),
508
+ agent_state_json=json.dumps(memory.get("state", {}), indent=2),
509
+ active_track=active_track,
510
+ track_policy_json=json.dumps(track_policy, indent=2),
511
+ budget_policy_json=json.dumps(budget_policy, indent=2),
512
+ )
513
+
514
+ log.info("Asking Claude (%s) for a plan...", model)
515
+ log.debug("System prompt: %d chars, User prompt: %d chars", len(system_prompt), len(user_prompt))
516
+
517
+ response = client.messages.create(
518
+ model=model,
519
+ max_tokens=max_tokens,
520
+ temperature=temperature,
521
+ system=system_prompt,
522
+ messages=[{"role": "user", "content": user_prompt}],
523
+ )
524
+
525
+ # Extract text from response
526
+ text = ""
527
+ for block in response.content:
528
+ if hasattr(block, "text"):
529
+ text += block.text
530
+
531
+ # Parse JSON from response
532
+ # Handle case where Claude wraps in code fences
533
+ text = text.strip()
534
+ if text.startswith("```"):
535
+ # Remove code fences
536
+ lines = text.splitlines()
537
+ lines = [l for l in lines if not l.strip().startswith("```")]
538
+ text = "\n".join(lines)
539
+
540
+ try:
541
+ plan = json.loads(text)
542
+ except json.JSONDecodeError as exc:
543
+ log.error("Failed to parse Claude's response as JSON: %s", exc)
544
+ log.error("Raw response: %s", text[:500])
545
+ plan = {"reasoning": f"Failed to parse response: {exc}", "actions": []}
546
+
547
+ actions = plan.get("actions", [])
548
+ log.info(
549
+ "Claude proposed %d action(s): %s",
550
+ len(actions),
551
+ ", ".join(a.get("type", "?") for a in actions) or "(none)",
552
+ )
553
+ log.info("Reasoning: %s", plan.get("reasoning", "")[:200])
554
+
555
+ return plan
556
+
557
+
558
+ # ---------------------------------------------------------------------------
559
+ # Main cycle
560
+ # ---------------------------------------------------------------------------
561
+
562
+
563
+ def run_cycle(
564
+ config: Dict[str, Any],
565
+ client: Any,
566
+ cycle_number: int,
567
+ dry_run: bool = False,
568
+ ) -> List[ActionResult]:
569
+ """Run one complete agent cycle."""
570
+ repo_root = str(REPO_ROOT)
571
+ budget_policy = normalized_budget_policy(config)
572
+
573
+ # 1. Gather state
574
+ log.info("=== Cycle %d: Gathering state ===", cycle_number)
575
+ state = gather_state(config, repo_root)
576
+
577
+ # 2. Load memory
578
+ memory = load_memory()
579
+ active_track = select_active_track(config, cycle_number)
580
+ log.info(
581
+ "Active track: %s (budget split: service=%s%%, self_improvement=%s%%, hard_cycle_token_cap=%s)",
582
+ active_track,
583
+ budget_policy["user_service_pct"],
584
+ budget_policy["self_improvement_pct"],
585
+ budget_policy["hard_cycle_token_cap"],
586
+ )
587
+
588
+ # 3. Load constitution
589
+ constitution = load_constitution(repo_root)
590
+
591
+ # 4. Ask Claude for a plan
592
+ if client is None:
593
+ log.info("No Anthropic client available -- skipping Claude reasoning (dry-run)")
594
+ plan = {"reasoning": f"No API client (dry-run without SDK/key), active_track={active_track}", "actions": []}
595
+ else:
596
+ plan = ask_claude_for_plan(client, config, state, memory, constitution, active_track)
597
+
598
+ actions = plan.get("actions", [])
599
+ if not actions:
600
+ log.info("No actions proposed this cycle.")
601
+ update_state(cycle_number, [], active_track, budget_policy)
602
+ return []
603
+
604
+ # 5. Process each action
605
+ results: List[ActionResult] = []
606
+ for i, action in enumerate(actions):
607
+ action_type = action.get("type", "unknown")
608
+ log.info("--- Action %d/%d: %s ---", i + 1, len(actions), action_type)
609
+
610
+ if not action_allowed_in_track(action_type, active_track, config):
611
+ reason = f"Action '{action_type}' blocked by track policy for '{active_track}'"
612
+ log.warning(reason)
613
+ track_policy_alignment = AlignmentResult(
614
+ allowed=False,
615
+ risk_level="high",
616
+ reasoning=reason,
617
+ )
618
+ log_decision(
619
+ cycle_number,
620
+ action,
621
+ track_policy_alignment,
622
+ "blocked_by_track_policy",
623
+ reason,
624
+ active_track=active_track,
625
+ )
626
+ continue
627
+
628
+ # 5a. Check alignment
629
+ alignment = check_alignment(
630
+ action,
631
+ constitution,
632
+ json.dumps(memory.get("recent_decisions", [])[-5:]),
633
+ client=client if _HAS_ANTHROPIC else None,
634
+ model=config.get("reasoning", {}).get("model", "claude-sonnet-4-5-20250929"),
635
+ )
636
+
637
+ log.info(
638
+ "Alignment: %s (risk=%s) - %s",
639
+ "ALLOWED" if alignment.allowed else "DENIED",
640
+ alignment.risk_level,
641
+ alignment.reasoning[:150],
642
+ )
643
+
644
+ # 5b. Route based on alignment + risk
645
+ if not alignment.allowed:
646
+ log.warning("Action BLOCKED by alignment checker")
647
+ log_decision(cycle_number, action, alignment, "blocked", active_track=active_track)
648
+ continue
649
+
650
+ if alignment.risk_level == "forbidden":
651
+ log.warning("Action FORBIDDEN")
652
+ log_decision(cycle_number, action, alignment, "forbidden", active_track=active_track)
653
+ continue
654
+
655
+ if alignment.risk_level == "high":
656
+ log.info("High-risk action -> creating issue for human review")
657
+ if not dry_run:
658
+ issue_action = {
659
+ "type": "open_issue",
660
+ "params": {
661
+ "title": f"[Agent] Review request: {action_type}",
662
+ "body": (
663
+ f"## Proposed Action\n\n"
664
+ f"**Type:** `{action_type}`\n\n"
665
+ f"**Params:**\n```json\n{json.dumps(action.get('params', {}), indent=2)}\n```\n\n"
666
+ f"**Agent reasoning:** {action.get('reasoning', 'N/A')}\n\n"
667
+ f"## Alignment Check\n\n"
668
+ f"**Risk level:** {alignment.risk_level}\n"
669
+ f"**Reasoning:** {alignment.reasoning}\n\n"
670
+ f"---\n"
671
+ f"*This issue was created by gaia-agent cycle {cycle_number}. "
672
+ f"Please review and provide guidance.*"
673
+ ),
674
+ "labels": ["human-input", "agent-contribution"],
675
+ },
676
+ }
677
+ result = execute_action(issue_action, config, repo_root)
678
+ results.append(result)
679
+ log_decision(
680
+ cycle_number,
681
+ action,
682
+ alignment,
683
+ "deferred_to_human",
684
+ active_track=active_track,
685
+ )
686
+ continue
687
+
688
+ # 5c. Execute (low or medium risk)
689
+ if dry_run:
690
+ log.info("[DRY RUN] Would execute: %s", action_type)
691
+ log_decision(cycle_number, action, alignment, "dry_run", active_track=active_track)
692
+ continue
693
+
694
+ log.info("Executing action...")
695
+ result = execute_action(action, config, repo_root)
696
+ results.append(result)
697
+
698
+ outcome = "success" if result.success else "failed"
699
+ log.info("Result: %s - %s", outcome, result.output[:200] if result.output else result.error[:200])
700
+
701
+ log_decision(
702
+ cycle_number,
703
+ action,
704
+ alignment,
705
+ outcome,
706
+ result.output or result.error,
707
+ active_track=active_track,
708
+ )
709
+
710
+ # Learn from failures
711
+ if not result.success:
712
+ log_lesson(
713
+ cycle_number,
714
+ f"Action '{action_type}' failed: {result.error[:200]}",
715
+ "error",
716
+ json.dumps(action.get("params", {})),
717
+ )
718
+
719
+ # 6. Update state
720
+ update_state(cycle_number, results, active_track, budget_policy)
721
+
722
+ # 7. Rotate logs if needed
723
+ rotate_logs(config)
724
+
725
+ # 8. Commit memory
726
+ if not dry_run:
727
+ commit_memory(cycle_number)
728
+
729
+ return results
730
+
731
+
732
+ # ---------------------------------------------------------------------------
733
+ # Learn from PR feedback
734
+ # ---------------------------------------------------------------------------
735
+
736
+
737
+ def check_pr_feedback(config: Dict[str, Any], cycle_number: int) -> None:
738
+ """Check if any of our past PRs got merged or rejected, and learn."""
739
+ repo_root = str(REPO_ROOT)
740
+ state = gather_state(config, repo_root)
741
+
742
+ for pr in state.agent_pr_history:
743
+ pr_state = pr.get("state", "").upper()
744
+ pr_title = pr.get("title", "")
745
+ pr_number = pr.get("number", "?")
746
+
747
+ # Check if we already have a lesson about this PR
748
+ memory = load_memory(n_decisions=0, n_lessons=100)
749
+ already_logged = any(
750
+ f"PR #{pr_number}" in lesson.get("context", "")
751
+ for lesson in memory.get("lessons", [])
752
+ )
753
+ if already_logged:
754
+ continue
755
+
756
+ if pr_state == "MERGED":
757
+ log_lesson(
758
+ cycle_number,
759
+ f"PR #{pr_number} '{pr_title}' was merged successfully",
760
+ "pr_merged",
761
+ f"PR #{pr_number}",
762
+ )
763
+ log.info("Learned: PR #%s was merged", pr_number)
764
+
765
+ elif pr_state == "CLOSED":
766
+ log_lesson(
767
+ cycle_number,
768
+ f"PR #{pr_number} '{pr_title}' was closed without merge -- review why",
769
+ "pr_rejected",
770
+ f"PR #{pr_number}",
771
+ )
772
+ log.info("Learned: PR #%s was rejected", pr_number)
773
+
774
+
775
+ # ---------------------------------------------------------------------------
776
+ # CLI
777
+ # ---------------------------------------------------------------------------
778
+
779
+
780
+ def main() -> int:
781
+ parser = argparse.ArgumentParser(
782
+ description="Gaia Minds Self-Evolving Agent",
783
+ formatter_class=argparse.RawDescriptionHelpFormatter,
784
+ epilog=(
785
+ "Examples:\n"
786
+ " python3 tools/agent-loop.py # single cycle\n"
787
+ " python3 tools/agent-loop.py --mode continuous # loop every hour\n"
788
+ " python3 tools/agent-loop.py --dry-run -v # plan without executing\n"
789
+ ),
790
+ )
791
+ parser.add_argument(
792
+ "--config",
793
+ default=str(CONFIG_PATH),
794
+ help="Path to agent-config.yml (default: tools/agent-config.yml)",
795
+ )
796
+ parser.add_argument(
797
+ "--mode",
798
+ choices=["single", "continuous"],
799
+ default=None,
800
+ help="Override cycle mode from config",
801
+ )
802
+ parser.add_argument(
803
+ "--dry-run",
804
+ action="store_true",
805
+ help="Gather state and plan, but don't execute actions",
806
+ )
807
+ parser.add_argument(
808
+ "--verbose", "-v",
809
+ action="store_true",
810
+ help="Enable debug logging",
811
+ )
812
+ args = parser.parse_args()
813
+
814
+ # Set up logging
815
+ level = logging.DEBUG if args.verbose else logging.INFO
816
+ logging.basicConfig(level=level, format=LOG_FORMAT, datefmt=LOG_DATE_FORMAT, stream=sys.stderr)
817
+
818
+ # Check for anthropic SDK (not needed in dry-run)
819
+ if not _HAS_ANTHROPIC and not args.dry_run:
820
+ log.error(
821
+ "The 'anthropic' package is required. Install it:\n"
822
+ " pip install anthropic\n"
823
+ "Or: pip install -r requirements.txt"
824
+ )
825
+ return 1
826
+
827
+ # Check for API key (not needed in dry-run)
828
+ if not os.environ.get("ANTHROPIC_API_KEY") and not args.dry_run:
829
+ log.error(
830
+ "ANTHROPIC_API_KEY environment variable is not set.\n"
831
+ " export ANTHROPIC_API_KEY='your-key-here'"
832
+ )
833
+ return 1
834
+
835
+ # Load config
836
+ config_path = Path(args.config)
837
+ if not config_path.exists():
838
+ log.error("Config file not found: %s", config_path)
839
+ return 1
840
+
841
+ config = load_config(config_path)
842
+ log.info("Loaded config: %s v%s", config.get("agent", {}).get("name", "?"), config.get("agent", {}).get("version", "?"))
843
+
844
+ # Determine mode
845
+ mode = args.mode or config.get("cycle", {}).get("mode", "single")
846
+
847
+ # Initialize Anthropic client
848
+ if _HAS_ANTHROPIC and os.environ.get("ANTHROPIC_API_KEY"):
849
+ client = anthropic.Anthropic()
850
+ log.info("Anthropic client initialized (model: %s)", config.get("reasoning", {}).get("model", "?"))
851
+ else:
852
+ client = None
853
+ log.info("No Anthropic client (dry-run or missing SDK/key)")
854
+
855
+ # Ensure memory directory exists
856
+ MEMORY_DIR.mkdir(parents=True, exist_ok=True)
857
+ for path in [DECISIONS_PATH, LESSONS_PATH]:
858
+ if not path.exists():
859
+ path.touch()
860
+ if not STATE_PATH.exists():
861
+ STATE_PATH.write_text("{}\n", encoding="utf-8")
862
+
863
+ # Load current state to get cycle number
864
+ try:
865
+ state_data = json.loads(STATE_PATH.read_text(encoding="utf-8"))
866
+ except (json.JSONDecodeError, FileNotFoundError):
867
+ state_data = {}
868
+ cycle_number = state_data.get("last_cycle", 0) + 1
869
+
870
+ if mode == "single":
871
+ log.info("Running single cycle (#%d)...", cycle_number)
872
+ # Check PR feedback before planning
873
+ check_pr_feedback(config, cycle_number)
874
+ results = run_cycle(config, client, cycle_number, dry_run=args.dry_run)
875
+ succeeded = sum(1 for r in results if r.success)
876
+ failed = sum(1 for r in results if not r.success)
877
+ log.info("Cycle %d complete: %d succeeded, %d failed", cycle_number, succeeded, failed)
878
+ return 1 if failed > 0 else 0
879
+
880
+ elif mode == "continuous":
881
+ interval = config.get("cycle", {}).get("interval_minutes", 60)
882
+ max_cycles = config.get("cycle", {}).get("max_cycles", 0)
883
+ log.info("Running continuously (interval=%dm, max_cycles=%s)...", interval, max_cycles or "unlimited")
884
+
885
+ try:
886
+ cycles_run = 0
887
+ while True:
888
+ check_pr_feedback(config, cycle_number)
889
+ results = run_cycle(config, client, cycle_number, dry_run=args.dry_run)
890
+ succeeded = sum(1 for r in results if r.success)
891
+ failed = sum(1 for r in results if not r.success)
892
+ log.info("Cycle %d complete: %d succeeded, %d failed", cycle_number, succeeded, failed)
893
+
894
+ cycle_number += 1
895
+ cycles_run += 1
896
+
897
+ if max_cycles and cycles_run >= max_cycles:
898
+ log.info("Reached max_cycles (%d). Stopping.", max_cycles)
899
+ break
900
+
901
+ log.info("Sleeping %d minutes until next cycle...", interval)
902
+ time.sleep(interval * 60)
903
+
904
+ except KeyboardInterrupt:
905
+ log.info("\nInterrupted by user. Exiting gracefully.")
906
+ return 0
907
+
908
+ return 0
909
+
910
+
911
+ if __name__ == "__main__":
912
+ raise SystemExit(main())