agentkernel-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. agentkernel/__init__.py +7 -0
  2. agentkernel/__main__.py +5 -0
  3. agentkernel/agent.py +311 -0
  4. agentkernel/approval/__init__.py +23 -0
  5. agentkernel/approval/base.py +34 -0
  6. agentkernel/approval/cli.py +129 -0
  7. agentkernel/approval/policy.py +58 -0
  8. agentkernel/approval/risk.py +91 -0
  9. agentkernel/approval/sandbox.py +201 -0
  10. agentkernel/budget.py +64 -0
  11. agentkernel/checkpoint.py +50 -0
  12. agentkernel/cli.py +1482 -0
  13. agentkernel/config.py +224 -0
  14. agentkernel/context/__init__.py +17 -0
  15. agentkernel/context/manager.py +216 -0
  16. agentkernel/context/truncate.py +35 -0
  17. agentkernel/cron.py +146 -0
  18. agentkernel/curation.py +183 -0
  19. agentkernel/doctor.py +141 -0
  20. agentkernel/embeddings.py +132 -0
  21. agentkernel/evaluation.py +186 -0
  22. agentkernel/improvement.py +133 -0
  23. agentkernel/insights.py +141 -0
  24. agentkernel/kanban.py +114 -0
  25. agentkernel/knowledge.py +383 -0
  26. agentkernel/loops.py +145 -0
  27. agentkernel/mcp/__init__.py +23 -0
  28. agentkernel/mcp/client.py +181 -0
  29. agentkernel/mcp/config.py +59 -0
  30. agentkernel/mcp/tools.py +96 -0
  31. agentkernel/memory.py +1208 -0
  32. agentkernel/paths.py +73 -0
  33. agentkernel/plugins.py +76 -0
  34. agentkernel/profiles.py +70 -0
  35. agentkernel/progress.py +89 -0
  36. agentkernel/providers/__init__.py +35 -0
  37. agentkernel/providers/_http.py +157 -0
  38. agentkernel/providers/anthropic.py +282 -0
  39. agentkernel/providers/base.py +38 -0
  40. agentkernel/providers/credentials.py +65 -0
  41. agentkernel/providers/local.py +34 -0
  42. agentkernel/providers/openai.py +260 -0
  43. agentkernel/redaction.py +77 -0
  44. agentkernel/semantic_index.py +139 -0
  45. agentkernel/semantic_memory.py +253 -0
  46. agentkernel/skills.py +268 -0
  47. agentkernel/subagent.py +161 -0
  48. agentkernel/telemetry.py +199 -0
  49. agentkernel/templates/README.md +35 -0
  50. agentkernel/templates/SKILL.md +28 -0
  51. agentkernel/templates/eval-suite.toml +22 -0
  52. agentkernel/templates/loop.toml +29 -0
  53. agentkernel/templates/mcp-servers.toml +22 -0
  54. agentkernel/templates/profile.toml +29 -0
  55. agentkernel/templates/tool_module.py +64 -0
  56. agentkernel/tools/__init__.py +5 -0
  57. agentkernel/tools/base.py +100 -0
  58. agentkernel/tools/builtin/__init__.py +37 -0
  59. agentkernel/tools/builtin/checkpoint_tool.py +33 -0
  60. agentkernel/tools/builtin/clarify.py +60 -0
  61. agentkernel/tools/builtin/files.py +221 -0
  62. agentkernel/tools/builtin/kanban_tool.py +100 -0
  63. agentkernel/tools/builtin/search.py +225 -0
  64. agentkernel/tools/builtin/shell.py +67 -0
  65. agentkernel/tools/builtin/todo.py +106 -0
  66. agentkernel/tui/__init__.py +50 -0
  67. agentkernel/tui/app.py +594 -0
  68. agentkernel/types.py +127 -0
  69. agentkernel/worktree.py +64 -0
  70. agentkernel_cli-0.1.0.dist-info/METADATA +426 -0
  71. agentkernel_cli-0.1.0.dist-info/RECORD +74 -0
  72. agentkernel_cli-0.1.0.dist-info/WHEEL +4 -0
  73. agentkernel_cli-0.1.0.dist-info/entry_points.txt +2 -0
  74. agentkernel_cli-0.1.0.dist-info/licenses/LICENSE +201 -0
agentkernel/cli.py ADDED
@@ -0,0 +1,1482 @@
1
+ """CLI entry point (design §16, M4).
2
+
3
+ Wires a live provider, the builtin tools inside a ``LocalSandbox``, a
4
+ ``CliApprover``, and JSONL telemetry into an ``Agent``.
5
+
6
+ Modes:
7
+ - ``agentkernel`` / ``agentkernel repl`` — interactive REPL with per-turn progress
8
+ and slash commands.
9
+ - ``agentkernel run "prompt"`` / ``agentkernel run --file prompt.md`` — single
10
+ non-interactive run, prints the final answer, and exits.
11
+
12
+ The wiring (``build_runtime``) and the loop (``repl`` / ``run_once``) are
13
+ separated so the loop can be tested offline with a scripted provider.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import fnmatch
20
+ import json
21
+ import sys
22
+ from collections.abc import Callable
23
+ from dataclasses import replace
24
+ from pathlib import Path
25
+
26
+ from agentkernel.agent import Agent
27
+ from agentkernel.approval import AutoApprover, CliApprover, Sandbox, make_sandbox
28
+ from agentkernel.budget import BudgetGuard
29
+ from agentkernel.config import Config, resolve_config
30
+ from agentkernel.context import ContextManager, ModelSummarizer
31
+ from agentkernel.embeddings import EmbeddingError, OpenAIEmbeddingProvider
32
+ from agentkernel.knowledge import KnowledgeGraph, make_graph_tools
33
+ from agentkernel.mcp import MCPClient, MCPError, load_mcp_servers, register_mcp_servers
34
+ from agentkernel.mcp.config import MCPServerConfig
35
+ from agentkernel.memory import (
36
+ MemoryStore,
37
+ NoteStore,
38
+ make_memory_store,
39
+ make_memory_tools,
40
+ make_note_store,
41
+ )
42
+ from agentkernel.paths import agent_home, global_config_path
43
+ from agentkernel.profiles import Profile, load_profile
44
+ from agentkernel.progress import ProgressTelemetry
45
+ from agentkernel.providers import ProviderError, make_provider
46
+ from agentkernel.semantic_memory import SemanticSqliteNoteStore
47
+ from agentkernel.skills import DirectorySkillStore, make_skill_tool
48
+ from agentkernel.subagent import make_spawn_tool
49
+ from agentkernel.telemetry import JsonlTelemetry, NullTelemetry
50
+ from agentkernel.tools import ToolRegistry
51
+ from agentkernel.tools.builtin import default_tools
52
+
53
+ _BANNER = (
54
+ "agentkernel REPL - type your message and press enter. Commands: /exit, "
55
+ "/clear, /system, /profile, /skills, /skill, /tools, /trace, /cost, /memory, /improve."
56
+ )
57
+ _PROMPT = "> "
58
+ _EXIT_WORDS = {"exit", "quit", ":q"}
59
+
60
+
61
+ def _make_configured_note_store(config: Config) -> NoteStore:
62
+ """Build the note store named by config (semantic SQLite or JSONL notebook).
63
+
64
+ Shared by build_runtime (memory tools) and run_memory (curation), so the
65
+ notebook backend is selected identically in both.
66
+ """
67
+ if config.semantic_search:
68
+ try:
69
+ emb_provider = OpenAIEmbeddingProvider.from_config(config)
70
+ notes_path = Path(config.memory_notes_path)
71
+ if notes_path.suffix.lower() not in (".db", ".sqlite", ".sqlite3"):
72
+ notes_path = notes_path.parent / (notes_path.stem + ".semantic.db")
73
+ return SemanticSqliteNoteStore(
74
+ notes_path,
75
+ embedding_provider=emb_provider,
76
+ lsh_bits=config.semantic_search_lsh_bits,
77
+ )
78
+ except EmbeddingError as exc:
79
+ print(f"Warning: semantic search disabled: {exc}", file=sys.stderr)
80
+ return make_note_store(config.memory_notes_path)
81
+
82
+
83
+ def build_runtime(
84
+ config: Config,
85
+ *,
86
+ mcp_servers: list[MCPServerConfig] | None = None,
87
+ verbose: bool = False,
88
+ budget: BudgetGuard | None = None,
89
+ memory: MemoryStore | None = None,
90
+ sandbox: Sandbox | None = None,
91
+ session_id: str | None = None,
92
+ ) -> tuple[Agent, JsonlTelemetry, list[MCPClient]]:
93
+ """Construct an Agent, its telemetry, and any MCP clients from config.
94
+
95
+ Keys come from env. MCP-discovered tools register into the same registry as
96
+ the builtins — the loop never learns they came from elsewhere (design §13).
97
+ Pass ``sandbox`` to own its lifecycle (e.g. close a DockerSandbox container);
98
+ otherwise one is built from config.
99
+ """
100
+ provider = make_provider(config)
101
+ if sandbox is None:
102
+ sandbox = make_sandbox(
103
+ config.sandbox,
104
+ config.working_dir,
105
+ image=config.sandbox_image,
106
+ network=config.sandbox_network,
107
+ )
108
+ # Filesystem checkpoints (§18.1): when enabled, file tools back up files
109
+ # before editing and a `rollback` tool restores them.
110
+ checkpointer = None
111
+ if config.checkpoints:
112
+ from agentkernel.checkpoint import Checkpointer
113
+ from agentkernel.tools.builtin.checkpoint_tool import rollback_tool
114
+
115
+ checkpointer = Checkpointer()
116
+
117
+ registry = ToolRegistry()
118
+ for spec in default_tools(
119
+ sandbox,
120
+ config.working_dir,
121
+ max_result_tokens=config.max_tool_result_tokens,
122
+ checkpointer=checkpointer,
123
+ ):
124
+ registry.register(spec)
125
+ if checkpointer is not None:
126
+ registry.register(rollback_tool(checkpointer))
127
+
128
+ # In-session tools (§18.4): a planning todo list and a clarify-the-user tool.
129
+ if config.enable_todo:
130
+ from agentkernel.tools.builtin.todo import TodoList, todo_tool
131
+
132
+ registry.register(todo_tool(TodoList()))
133
+ if config.enable_clarify:
134
+ from agentkernel.tools.builtin.clarify import clarify_tool
135
+
136
+ registry.register(clarify_tool())
137
+ if config.enable_kanban:
138
+ from agentkernel.kanban import Board
139
+ from agentkernel.tools.builtin.kanban_tool import kanban_tool
140
+
141
+ registry.register(kanban_tool(Board(config.kanban_path)))
142
+
143
+ # Plugin tools (§18.7): user-authored tools auto-loaded from plugins_dir.
144
+ if config.enable_plugins:
145
+ from agentkernel.plugins import load_plugin_tools
146
+
147
+ def _warn_plugin(path, exc):
148
+ print(f"[plugin load failed: {path.name}] {exc}", file=sys.stderr)
149
+
150
+ for spec in load_plugin_tools(
151
+ config.plugins_dir, working_dir=config.working_dir, on_error=_warn_plugin
152
+ ):
153
+ if registry.spec(spec.name) is None:
154
+ registry.register(spec)
155
+ else:
156
+ print(f"[plugin tool skipped: {spec.name!r} already registered]", file=sys.stderr)
157
+
158
+ mcp_clients = register_mcp_servers(
159
+ registry, list(mcp_servers or []), log_dir=config.mcp_log_dir
160
+ )
161
+
162
+ # Phase 6: expose the knowledge graph as ordinary tools when enabled.
163
+ if config.enable_graph:
164
+ for spec in make_graph_tools(KnowledgeGraph(config.graph_path)):
165
+ registry.register(spec)
166
+
167
+ # Phase 3: session transcript memory. Use the injected store if provided,
168
+ # otherwise honor config.memory_store (file/sqlite/memory). Notes are
169
+ # independent and always live in a JSONL notebook at memory_notes_path.
170
+ if memory is None:
171
+ memory = make_memory_store(
172
+ config.memory_store,
173
+ config.memory_dir or ".agentkernel/memory",
174
+ )
175
+
176
+ notes: NoteStore | None = None
177
+ if config.enable_memory_tools:
178
+ notes = _make_configured_note_store(config)
179
+ for spec in make_memory_tools(notes, store=memory):
180
+ registry.register(spec)
181
+
182
+ # Phase 4: skills contribute a progressive-disclosure catalog via the
183
+ # context source; the model loads a skill's full body on demand with the
184
+ # use_skill tool (registered only when skills exist).
185
+ context_source = DirectorySkillStore(config.skills_dir, active_skills=config.skills)
186
+ if context_source.available_skills():
187
+ registry.register(make_skill_tool(context_source))
188
+
189
+ budget_for_context = provider.context_window - config.output_reserve
190
+ summarizer = None
191
+ if config.summarizer_model:
192
+ summarizer = ModelSummarizer(
193
+ make_provider(replace(config, model=config.summarizer_model))
194
+ )
195
+ context = ContextManager(
196
+ budget=budget_for_context,
197
+ keep_recent_turns=config.keep_recent_turns,
198
+ summarizer=summarizer,
199
+ )
200
+ # `smart` approval consults a cheap risk judge before prompting (§18.1).
201
+ risk_judge = None
202
+ if config.approval_policy == "smart":
203
+ from agentkernel.approval.risk import RiskJudge
204
+
205
+ judge_model = config.approval_judge_model or config.summarizer_model or config.model
206
+ risk_judge = RiskJudge(make_provider(replace(config, model=judge_model)))
207
+ approver = CliApprover(
208
+ config.approval_policy,
209
+ allowlist=config.approval_allowlist,
210
+ risk_judge=risk_judge,
211
+ )
212
+
213
+ # Sub-agent delegation (design §13): the model can spawn focused children.
214
+ # base_specs snapshots the tools BEFORE spawn so spawn isn't self-recursive
215
+ # except through the explicit, depth-limited spawn tools it creates.
216
+ if config.enable_spawn:
217
+ base_specs = registry.specs()
218
+
219
+ def _tool_factory(working_dir: str):
220
+ # Rebuild the builtin toolset bound to a worktree dir (§18.3), with its
221
+ # own sandbox so the child's file/shell tools are isolated there.
222
+ wt_sandbox = make_sandbox(
223
+ config.sandbox, working_dir,
224
+ image=config.sandbox_image, network=config.sandbox_network,
225
+ )
226
+ return default_tools(
227
+ wt_sandbox, working_dir,
228
+ max_result_tokens=config.max_tool_result_tokens,
229
+ )
230
+
231
+ registry.register(
232
+ make_spawn_tool(
233
+ provider=provider,
234
+ base_specs=base_specs,
235
+ approver=approver,
236
+ config=config,
237
+ max_depth=config.spawn_max_depth,
238
+ tool_factory=_tool_factory,
239
+ )
240
+ )
241
+
242
+ # A resumed session reuses its id, so telemetry appends to the same trace and
243
+ # the agent's pre-run memory load (§7) pulls that session's transcript.
244
+ telemetry = JsonlTelemetry(
245
+ config.log_dir, config.model, verbose=verbose, session_id=session_id
246
+ )
247
+ agent = Agent(
248
+ provider,
249
+ registry,
250
+ context,
251
+ approver,
252
+ telemetry,
253
+ config,
254
+ budget=budget,
255
+ memory=memory,
256
+ notes=notes,
257
+ context_source=context_source,
258
+ )
259
+ return agent, telemetry, mcp_clients
260
+
261
+
262
+ def _handle_slash(
263
+ line: str,
264
+ agent: Agent,
265
+ profile: Profile,
266
+ config: Config,
267
+ output_fn: Callable[[str], None],
268
+ ) -> bool:
269
+ """Process a REPL slash command. Returns True if the line was handled."""
270
+ parts = line.split(None, 1)
271
+ cmd = parts[0][1:]
272
+ arg = parts[1] if len(parts) > 1 else ""
273
+
274
+ if cmd in ("exit", "quit", "q"):
275
+ return False # signal to exit the loop
276
+
277
+ if cmd == "clear":
278
+ agent.context.clear()
279
+ output_fn("[context cleared]")
280
+ return True
281
+
282
+ if cmd == "system":
283
+ if not arg:
284
+ output_fn("[system prompt cleared]")
285
+ profile.system_prompt = None
286
+ else:
287
+ profile.system_prompt = arg
288
+ output_fn(f"[system prompt set: {arg[:60]!r}]")
289
+ return True
290
+
291
+ if cmd == "profile":
292
+ if not arg:
293
+ output_fn(f"[active profile: {profile.name}]")
294
+ return True
295
+ loaded = load_profile(
296
+ arg,
297
+ search_dirs=[Path(config.profile_dir)] if config.profile_dir else [],
298
+ )
299
+ if loaded is None:
300
+ output_fn(f"[profile not found: {arg}]")
301
+ return True
302
+ profile.name = loaded.name
303
+ profile.system_prompt = loaded.system_prompt
304
+ profile.tool_filter = loaded.tool_filter
305
+ profile.model_override = loaded.model_override
306
+ profile.rubric = loaded.rubric
307
+ output_fn(f"[profile loaded: {loaded.name}]")
308
+ return True
309
+
310
+ if cmd == "skills":
311
+ source = agent.context_source
312
+ available = source.available_skills() if source is not None else []
313
+ if not available:
314
+ output_fn("(no skills found)")
315
+ return True
316
+ active = getattr(source, "active_skills", set())
317
+ for name in available:
318
+ output_fn(f" [{'*' if name in active else ' '}] {name}")
319
+ return True
320
+
321
+ if cmd == "skill":
322
+ source = agent.context_source
323
+ if source is None or not hasattr(source, "activate"):
324
+ output_fn("(no skill store)")
325
+ return True
326
+ if not arg:
327
+ output_fn("usage: /skill <name>")
328
+ return True
329
+ state = source.activate(arg)
330
+ output_fn(f"[skill {arg}: {'on' if state else 'off'}]")
331
+ return True
332
+
333
+ if cmd == "tools":
334
+ specs = agent.registry.specs()
335
+ if not specs:
336
+ output_fn("(no tools registered)")
337
+ for spec in specs:
338
+ desc = spec.description.splitlines()[0] if spec.description else ""
339
+ output_fn(f" {spec.name}: {desc}")
340
+ return True
341
+
342
+ if cmd == "trace":
343
+ telemetry = agent.telemetry
344
+ path = getattr(telemetry, "path", str(getattr(telemetry, "path", "unknown")))
345
+ output_fn(f"[session trace: {path}]")
346
+ return True
347
+
348
+ if cmd == "cost":
349
+ telemetry = agent.telemetry
350
+ total = getattr(telemetry, "cumulative_cost", None)
351
+ usage = getattr(telemetry, "cumulative_usage", None)
352
+ if total is not None:
353
+ output_fn(
354
+ f"[session cost: ${total:.6f} | in={usage.input_tokens} "
355
+ f"out={usage.output_tokens}]"
356
+ )
357
+ else:
358
+ output_fn("[session cost: not tracked]")
359
+ return True
360
+
361
+ if cmd == "memory":
362
+ notes = getattr(agent, "notes", None)
363
+ if notes is None:
364
+ output_fn("(memory tools are not enabled)")
365
+ return True
366
+ subparts = arg.split(None, 1)
367
+ sub = subparts[0].lower() if subparts else "list"
368
+ subarg = subparts[1] if len(subparts) > 1 else ""
369
+ if sub == "list":
370
+ limit = int(subarg) if subarg.isdigit() else 20
371
+ all_notes = notes.recent(limit)
372
+ if not all_notes:
373
+ output_fn("(no memory notes)")
374
+ return True
375
+ for n in all_notes:
376
+ tag_part = f" [tags: {', '.join(n.tags)}]" if n.tags else ""
377
+ output_fn(f" [{n.note_id}] {n.text}{tag_part}")
378
+ return True
379
+ if sub == "delete":
380
+ if not subarg or not subarg.isdigit():
381
+ output_fn("usage: /memory delete <note_id>")
382
+ return True
383
+ removed = notes.forget(note_id=int(subarg))
384
+ if removed:
385
+ output_fn(f"[deleted note {subarg}]")
386
+ else:
387
+ output_fn(f"[note {subarg} not found]")
388
+ return True
389
+ if sub == "export":
390
+ dest = subarg or str(Path(notes.path).with_suffix(".md"))
391
+ path = notes.export(dest)
392
+ output_fn(f"[exported {len(notes.all())} notes to {path}]")
393
+ return True
394
+ if sub == "reindex":
395
+ if hasattr(notes, "reindex_embeddings"):
396
+ count = notes.reindex_embeddings()
397
+ output_fn(f"[reindexed {count} note(s)]")
398
+ else:
399
+ output_fn("(semantic search is not enabled for this notebook)")
400
+ return True
401
+ output_fn("usage: /memory [list [limit]|delete <note_id>|export [path]|reindex]")
402
+ return True
403
+
404
+ if cmd == "improve":
405
+ trace_arg = arg.strip() if arg else ""
406
+ trace_path: str | None = trace_arg or None
407
+ if not trace_path:
408
+ telemetry = agent.telemetry
409
+ trace_path = getattr(telemetry, "path", None)
410
+ if trace_path:
411
+ trace_path = str(trace_path)
412
+ try:
413
+ return run_improve(config, trace=trace_path, output_fn=output_fn)
414
+ except Exception as exc:
415
+ output_fn(f"[improve error] {exc}")
416
+ return True
417
+
418
+ output_fn(f"[unknown command: /{cmd}]")
419
+ return True
420
+
421
+
422
+ def repl(
423
+ agent: Agent,
424
+ *,
425
+ config: Config | None = None,
426
+ input_fn: Callable[[str], str] = input,
427
+ output_fn: Callable[[str], None] = print,
428
+ stream_fn: Callable[[str], None] | None = None,
429
+ ) -> int:
430
+ """Read-eval-print chat over one Agent (context persists across messages).
431
+
432
+ When ``stream_fn`` is set (and config.stream), model text is written to it as
433
+ it arrives and the final answer is not re-printed."""
434
+ cfg = config or agent.config
435
+ streaming = stream_fn is not None and getattr(cfg, "stream", True)
436
+ output_fn(_BANNER)
437
+ profile = Profile(name="default")
438
+ while True:
439
+ try:
440
+ line = input_fn(_PROMPT).strip()
441
+ except (EOFError, KeyboardInterrupt):
442
+ output_fn("") # newline after ^D / ^C
443
+ break
444
+ if not line:
445
+ continue
446
+ if line.lower() in _EXIT_WORDS:
447
+ break
448
+ if line.startswith("/"):
449
+ if not _handle_slash(line, agent, profile, cfg, output_fn):
450
+ break
451
+ continue
452
+ streamed = {"any": False}
453
+
454
+ def on_text(text: str, _s=streamed) -> None:
455
+ _s["any"] = True
456
+ stream_fn(text) # type: ignore[misc]
457
+
458
+ try:
459
+ answer = agent.run(
460
+ line, profile=profile, on_text=on_text if streaming else None
461
+ )
462
+ except ProviderError as exc:
463
+ output_fn(f"[provider error] {exc}")
464
+ continue
465
+ if streaming and streamed["any"]:
466
+ stream_fn("\n") # type: ignore[misc]
467
+ else:
468
+ output_fn(answer)
469
+ return 0
470
+
471
+
472
+ def run_once(
473
+ agent: Agent,
474
+ prompt: str,
475
+ *,
476
+ profile: Profile | None = None,
477
+ output_fn: Callable[[str], None] = print,
478
+ stream_fn: Callable[[str], None] | None = None,
479
+ config: Config | None = None,
480
+ ) -> int:
481
+ """Execute a single non-interactive turn and print (or stream) the answer."""
482
+ cfg = config or agent.config
483
+ streaming = stream_fn is not None and getattr(cfg, "stream", True)
484
+ streamed = {"any": False}
485
+
486
+ def on_text(text: str) -> None:
487
+ streamed["any"] = True
488
+ stream_fn(text) # type: ignore[misc]
489
+
490
+ try:
491
+ answer = agent.run(
492
+ prompt, profile=profile, on_text=on_text if streaming else None
493
+ )
494
+ except ProviderError as exc:
495
+ output_fn(f"[provider error] {exc}")
496
+ return 1
497
+ if streaming and streamed["any"]:
498
+ stream_fn("\n") # type: ignore[misc]
499
+ else:
500
+ output_fn(answer)
501
+ return 0
502
+
503
+
504
+ # kind -> (template filename, destination path relative to project root).
505
+ # {name} in the destination is filled with the asset name.
506
+ _NEW_KINDS: dict[str, tuple[str, str]] = {
507
+ "skill": ("SKILL.md", "skills/{name}/SKILL.md"),
508
+ "profile": ("profile.toml", "profiles/{name}.toml"),
509
+ "loop": ("loop.toml", "loops/{name}.toml"),
510
+ "eval": ("eval-suite.toml", "evals/{name}.toml"),
511
+ }
512
+
513
+
514
+ def _find_templates_dir(start: Path | None = None) -> Path | None:
515
+ """Locate the templates/ directory: nearest one walking up from ``start``,
516
+ else the copy bundled inside the installed package (so `new` works after a
517
+ global install, not just from a checkout)."""
518
+ here = (start or Path.cwd()).resolve()
519
+ for directory in (here, *here.parents):
520
+ candidate = directory / "templates"
521
+ if candidate.is_dir():
522
+ return candidate
523
+ bundled = Path(__file__).parent / "templates"
524
+ return bundled if bundled.is_dir() else None
525
+
526
+
527
+ def run_new(
528
+ kind: str,
529
+ name: str,
530
+ *,
531
+ force: bool = False,
532
+ templates_dir: Path | None = None,
533
+ project_root: Path | None = None,
534
+ output_fn: Callable[[str], None] = print,
535
+ ) -> int:
536
+ """Scaffold a skill/profile/loop/eval from a template (§18.8 roadmap)."""
537
+ if kind not in _NEW_KINDS:
538
+ output_fn(f"[unknown kind: {kind}] choose one of {', '.join(_NEW_KINDS)}")
539
+ return 1
540
+ if not name or any(sep in name for sep in ("/", "\\", "..")) or name.startswith("."):
541
+ output_fn(f"[invalid name: {name!r}] use a simple kebab-case name")
542
+ return 1
543
+
544
+ templates = templates_dir or _find_templates_dir()
545
+ if templates is None:
546
+ output_fn("[no templates/ directory found] run this inside an agentkernel project")
547
+ return 1
548
+ template_file, dest_pattern = _NEW_KINDS[kind]
549
+ template_path = templates / template_file
550
+ if not template_path.is_file():
551
+ output_fn(f"[template missing: {template_path}]")
552
+ return 1
553
+
554
+ root = project_root or templates.parent
555
+ dest = root / dest_pattern.format(name=name)
556
+ if dest.exists() and not force:
557
+ output_fn(f"[exists: {dest}] pass --force to overwrite")
558
+ return 1
559
+
560
+ content = template_path.read_text(encoding="utf-8").replace("{{name}}", name)
561
+ dest.parent.mkdir(parents=True, exist_ok=True)
562
+ dest.write_text(content, encoding="utf-8")
563
+ output_fn(f"[created {kind}: {dest}]")
564
+ return 0
565
+
566
+
567
+ _PROJECT_CONFIG_TEMPLATE = """\
568
+ # agentkernel project config. Global defaults live in ~/.agentkernel/config.toml
569
+ # (or $AGENTKERNEL_HOME); keys here override them for this project.
570
+ # API keys come ONLY from the environment, never this file.
571
+
572
+ provider = "anthropic" # "anthropic" | "openai" | "local"
573
+ model = "claude-sonnet-4-6"
574
+ # base_url = "http://localhost:1234/v1" # for provider = "local" (LM Studio, Ollama, vLLM)
575
+
576
+ approval_policy = "always_ask" # always_ask | auto_allow | deny_mutations | smart
577
+
578
+ # Opt into the higher-level capabilities you want:
579
+ # enable_memory_tools = true # remember/recall long-term facts
580
+ # enable_spawn = true # let the model delegate to sub-agents
581
+ # skills = ["code-review"] # pin skills from skills_dir
582
+ """
583
+
584
+ _GLOBAL_CONFIG_TEMPLATE = """\
585
+ # agentkernel user-global config (applies to every project unless overridden by
586
+ # a project agentkernel.toml). API keys come ONLY from the environment.
587
+
588
+ provider = "anthropic"
589
+ model = "claude-sonnet-4-6"
590
+ approval_policy = "always_ask"
591
+ """
592
+
593
+
594
+ def run_init(
595
+ *,
596
+ target_dir: str = ".",
597
+ global_config: bool = False,
598
+ force: bool = False,
599
+ output_fn: Callable[[str], None] = print,
600
+ ) -> int:
601
+ """Scaffold a starter config: a project ``agentkernel.toml`` or the global one."""
602
+ if global_config:
603
+ dest = global_config_path(agent_home())
604
+ template = _GLOBAL_CONFIG_TEMPLATE
605
+ else:
606
+ dest = Path(target_dir).resolve() / "agentkernel.toml"
607
+ template = _PROJECT_CONFIG_TEMPLATE
608
+ if dest.exists() and not force:
609
+ output_fn(f"[exists: {dest}] pass --force to overwrite")
610
+ return 1
611
+ dest.parent.mkdir(parents=True, exist_ok=True)
612
+ dest.write_text(template, encoding="utf-8")
613
+ output_fn(f"[created {dest}]")
614
+ output_fn(
615
+ "Set provider/model and export your API key env var, then run: "
616
+ 'agentkernel run "..."'
617
+ )
618
+ return 0
619
+
620
+
621
+ def run_sessions(
622
+ config: Config,
623
+ action: str,
624
+ session_id: str | None,
625
+ *,
626
+ output_fn: Callable[[str], None] = print,
627
+ ) -> int:
628
+ """List, show, or delete saved sessions (§18.2). Resume one with --resume."""
629
+ memory_dir = config.memory_dir or str(Path(config.log_dir).parent / "memory")
630
+ store = make_memory_store(config.memory_store or "file", memory_dir)
631
+ if store is None:
632
+ output_fn("[no memory store configured]")
633
+ return 1
634
+
635
+ if action == "list":
636
+ ids = store.list_sessions()
637
+ if not ids:
638
+ output_fn("(no saved sessions)")
639
+ return 0
640
+ for sid in ids:
641
+ messages = store.load(sid)
642
+ first_user = next(
643
+ (m.content for m in messages if m.role == "user" and m.content), ""
644
+ )
645
+ preview = (first_user[:60] + "…") if len(first_user) > 60 else first_user
646
+ output_fn(f" {sid} ({len(messages)} msgs) {preview}")
647
+ output_fn("\nResume one with: agentkernel --resume <id>")
648
+ return 0
649
+
650
+ if not session_id:
651
+ output_fn(f"usage: agentkernel sessions {action} <session_id>")
652
+ return 1
653
+
654
+ if action == "show":
655
+ messages = store.load(session_id)
656
+ if not messages:
657
+ output_fn(f"[no session {session_id!r}]")
658
+ return 1
659
+ for m in messages:
660
+ text = m.content or (
661
+ f"[{len(m.tool_results)} tool result(s)]" if m.tool_results else ""
662
+ )
663
+ output_fn(f"{m.role}: {text}")
664
+ return 0
665
+
666
+ # action == "delete"
667
+ store.delete(session_id)
668
+ output_fn(f"[deleted session {session_id}]")
669
+ return 0
670
+
671
+
672
+ def run_background(
673
+ prompt: str,
674
+ *,
675
+ config_path: str | None = None,
676
+ cwd: str | None = None,
677
+ log_dir: str = ".agentkernel/traces",
678
+ spawn=None,
679
+ output_fn: Callable[[str], None] = print,
680
+ ) -> int:
681
+ """Launch ``agentkernel run <prompt>`` as a detached process (§18.2).
682
+
683
+ Output is redirected to a file under ``<log_dir>/../background/``. The child
684
+ is fully detached so it survives this process exiting. ``spawn`` is injectable
685
+ for tests; by default it is a platform-appropriate ``subprocess.Popen``.
686
+ """
687
+ import subprocess
688
+ import sys
689
+ import uuid
690
+
691
+ if not prompt.strip():
692
+ output_fn("[background] nothing to run (empty prompt)")
693
+ return 1
694
+
695
+ out_dir = Path(log_dir).parent / "background"
696
+ out_dir.mkdir(parents=True, exist_ok=True)
697
+ out_path = out_dir / f"{uuid.uuid4().hex[:8]}.out"
698
+ argv = [sys.executable, "-m", "agentkernel"]
699
+ if config_path:
700
+ argv += ["--config", config_path]
701
+ if cwd:
702
+ argv += ["-C", cwd] # let the detached child resolve config from the project
703
+ argv += ["run", prompt]
704
+
705
+ def _default_spawn(args, *, stdout):
706
+ kwargs = {
707
+ "stdout": stdout,
708
+ "stderr": subprocess.STDOUT,
709
+ "stdin": subprocess.DEVNULL,
710
+ }
711
+ if sys.platform == "win32":
712
+ kwargs["creationflags"] = (
713
+ subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
714
+ )
715
+ else:
716
+ kwargs["start_new_session"] = True
717
+ return subprocess.Popen(args, **kwargs)
718
+
719
+ launcher = spawn or _default_spawn
720
+ handle = out_path.open("w", encoding="utf-8")
721
+ try:
722
+ proc = launcher(argv, stdout=handle)
723
+ finally:
724
+ # The child holds its own copy of the fd; we can close ours.
725
+ handle.close()
726
+ pid = getattr(proc, "pid", "?")
727
+ output_fn(f"[background] started (pid {pid}); output -> {out_path}")
728
+ return 0
729
+
730
+
731
+ def run_kanban(
732
+ config: Config,
733
+ action: str,
734
+ rest: list[str],
735
+ *,
736
+ output_fn: Callable[[str], None] = print,
737
+ ) -> int:
738
+ """Inspect and manage the shared work board from the CLI (§18.3)."""
739
+ from agentkernel.kanban import Board, render_task
740
+
741
+ board = Board(config.kanban_path)
742
+
743
+ if action == "list":
744
+ tasks = board.list()
745
+ if not tasks:
746
+ output_fn("(board is empty)")
747
+ return 0
748
+ for t in tasks:
749
+ output_fn(f" {render_task(t)}")
750
+ return 0
751
+ if action == "add":
752
+ if not rest:
753
+ output_fn('usage: agentkernel kanban add "<title>"')
754
+ return 1
755
+ task = board.add(" ".join(rest))
756
+ output_fn(f"[added {task.id}: {task.title}]")
757
+ return 0
758
+
759
+ if not rest:
760
+ output_fn(f"usage: agentkernel kanban {action} <task_id>")
761
+ return 1
762
+ task_id = rest[0]
763
+ if action == "show":
764
+ task = board.get(task_id)
765
+ if task is None:
766
+ output_fn(f"[no task {task_id}]")
767
+ return 1
768
+ output_fn(render_task(task))
769
+ for note in task.notes:
770
+ output_fn(f" - {note}")
771
+ return 0
772
+ if action == "complete":
773
+ ok = board.complete(task_id) is not None
774
+ elif action == "block":
775
+ ok = board.block(task_id, " ".join(rest[1:])) is not None
776
+ else: # remove
777
+ tasks = [t for t in board.list() if t.id != task_id]
778
+ ok = len(tasks) != len(board.list())
779
+ if ok:
780
+ board._write(tasks)
781
+ output_fn(f"[{action}d {task_id}]" if ok else f"[no task {task_id}]")
782
+ return 0 if ok else 1
783
+
784
+
785
+ def _cron_run_one(config: Config, prompt: str) -> str:
786
+ """Run one cron job's prompt through a fresh runtime and return the answer."""
787
+ sandbox = make_sandbox(
788
+ config.sandbox, config.working_dir,
789
+ image=config.sandbox_image, network=config.sandbox_network,
790
+ )
791
+ try:
792
+ agent, telemetry, clients = build_runtime(config, sandbox=sandbox)
793
+ try:
794
+ return agent.run(prompt)
795
+ finally:
796
+ telemetry.close()
797
+ for c in clients:
798
+ c.close()
799
+ finally:
800
+ sandbox.close()
801
+
802
+
803
+ def run_cron(
804
+ config: Config,
805
+ action: str,
806
+ rest: list[str],
807
+ *,
808
+ output_fn: Callable[[str], None] = print,
809
+ run_fn: Callable[[str], str] | None = None,
810
+ ) -> int:
811
+ """Manage and run scheduled jobs (§18.2)."""
812
+ from agentkernel.cron import JobStore, run_due_jobs
813
+
814
+ store = JobStore(config.cron_path)
815
+ runner = run_fn or (lambda prompt: _cron_run_one(config, prompt))
816
+
817
+ if action == "list":
818
+ jobs = store.list()
819
+ if not jobs:
820
+ output_fn("(no scheduled jobs)")
821
+ return 0
822
+ for j in jobs:
823
+ state = "" if j.enabled else " [disabled]"
824
+ last = j.last_run or "never"
825
+ preview = (j.prompt[:50] + "…") if len(j.prompt) > 50 else j.prompt
826
+ output_fn(f" {j.id} every {j.schedule}{state} (last: {last}) {preview}")
827
+ return 0
828
+
829
+ if action == "add":
830
+ if len(rest) < 2:
831
+ output_fn('usage: agentkernel cron add <schedule> "<prompt>"')
832
+ return 1
833
+ schedule, prompt = rest[0], " ".join(rest[1:])
834
+ try:
835
+ job = store.add(schedule, prompt)
836
+ except ValueError as exc:
837
+ output_fn(f"[invalid schedule] {exc}")
838
+ return 1
839
+ output_fn(f"[added job {job.id}: every {job.schedule}]")
840
+ return 0
841
+
842
+ if action in ("remove", "run"):
843
+ if not rest:
844
+ output_fn(f"usage: agentkernel cron {action} <job_id>")
845
+ return 1
846
+ job_id = rest[0]
847
+ if action == "remove":
848
+ ok = store.remove(job_id)
849
+ output_fn(f"[removed {job_id}]" if ok else f"[no job {job_id}]")
850
+ return 0 if ok else 1
851
+ job = store.get(job_id)
852
+ if job is None:
853
+ output_fn(f"[no job {job_id}]")
854
+ return 1
855
+ result = runner(job.prompt)
856
+ store.mark_run(job_id)
857
+ output_fn(result)
858
+ return 0
859
+
860
+ # action == "tick": run everything due once.
861
+ results = run_due_jobs(store, runner)
862
+ if not results:
863
+ output_fn("(nothing due)")
864
+ return 0
865
+ for job_id, result in results:
866
+ output_fn(f"[{job_id}] {result}")
867
+ return 0
868
+
869
+
870
+ def run_memory(
871
+ config: Config,
872
+ action: str,
873
+ *,
874
+ session: str | None = None,
875
+ output_fn: Callable[[str], None] = print,
876
+ ) -> int:
877
+ """Curate long-term memory: extract facts from a session, or consolidate.
878
+
879
+ These are best-effort harness operations over the configured note store; no
880
+ sandbox, MCP, or tools are needed.
881
+ """
882
+ from agentkernel.curation import MemoryCurator
883
+
884
+ notes = _make_configured_note_store(config)
885
+ curator_model = config.memory_curator_model or config.summarizer_model or config.model
886
+ provider = make_provider(replace(config, model=curator_model))
887
+ curator = MemoryCurator(notes, provider)
888
+
889
+ if action == "consolidate":
890
+ result = curator.consolidate()
891
+ output_fn(
892
+ f"Consolidated memory: {result.before} -> {result.after} note(s) "
893
+ f"({result.removed} merged/removed)."
894
+ )
895
+ return 0
896
+
897
+ if action == "extract":
898
+ memory_dir = config.memory_dir or ".agentkernel/memory"
899
+ store = make_memory_store(config.memory_store or "file", memory_dir)
900
+ target = session
901
+ if target is None:
902
+ sessions = store.list_sessions()
903
+ if not sessions:
904
+ output_fn("[no saved sessions to extract from]")
905
+ return 1
906
+ if len(sessions) > 1:
907
+ output_fn(
908
+ "[multiple sessions; pass --session <id>. Available: "
909
+ + ", ".join(sessions)
910
+ + "]"
911
+ )
912
+ return 1
913
+ target = sessions[0]
914
+ messages = store.load(target)
915
+ if not messages:
916
+ output_fn(f"[no messages in session {target}]")
917
+ return 1
918
+ result = curator.extract(messages)
919
+ output_fn(
920
+ f"Extracted {len(result.added)} new fact(s) from session {target} "
921
+ f"({result.skipped_duplicates} duplicate(s) skipped)."
922
+ )
923
+ for note in result.added:
924
+ output_fn(f" + {note.text}")
925
+ return 0
926
+
927
+ output_fn(f"[unknown memory action: {action}]")
928
+ return 1
929
+
930
+
931
+ def run_improve(
932
+ config: Config,
933
+ *,
934
+ trace: str | None = None,
935
+ output_fn: Callable[[str], None] = print,
936
+ ) -> int:
937
+ """Reflect on a session trace and write an improvement note (Phase 7)."""
938
+ from agentkernel.improvement import SelfImprover
939
+
940
+ improver = SelfImprover(make_provider(config), config.improvements_dir)
941
+ trace_path = trace or improver.latest_trace(config.log_dir)
942
+ if trace_path is None:
943
+ output_fn(f"[no trace found in {config.log_dir}]")
944
+ return 1
945
+ try:
946
+ improvement = improver.analyze_trace(trace_path)
947
+ except ProviderError as exc:
948
+ output_fn(f"[provider error] {exc}")
949
+ return 1
950
+ output_fn(improvement.suggestion)
951
+ output_fn(f"[improvement written to {improvement.output_path}]")
952
+ return 0
953
+
954
+
955
+ def run_eval(
956
+ config: Config,
957
+ suite_path: str,
958
+ *,
959
+ judge_model: str | None = None,
960
+ output_path: str | None = None,
961
+ case_filter: list[str] | None = None,
962
+ output_fn: Callable[[str], None] = print,
963
+ ) -> int:
964
+ """Run an eval suite: agent answers each case, a judge scores it (Phase 5).
965
+
966
+ Returns 0 only if every case passes, so it doubles as a CI gate.
967
+ """
968
+ from agentkernel.evaluation import Evaluator, load_eval_suite
969
+
970
+ default_rubric, cases = load_eval_suite(suite_path)
971
+ effective_default = config.eval_rubric or default_rubric
972
+ if not cases:
973
+ output_fn("[no cases in suite]")
974
+ return 1
975
+
976
+ if case_filter:
977
+ case_filter = list(dict.fromkeys(case_filter))
978
+ cases = [
979
+ c
980
+ for c in cases
981
+ if any(fnmatch.fnmatchcase(c.name, pat) for pat in case_filter)
982
+ ]
983
+ if not cases:
984
+ output_fn(f"[no cases matched filter: {case_filter!r}]")
985
+ return 1
986
+
987
+ sandbox = make_sandbox(
988
+ config.sandbox, config.working_dir,
989
+ image=config.sandbox_image, network=config.sandbox_network,
990
+ )
991
+ base_agent, telemetry, mcp_clients = build_runtime(config, sandbox=sandbox)
992
+
993
+ def agent_factory() -> Agent:
994
+ context = ContextManager(
995
+ budget=base_agent.provider.context_window - config.output_reserve,
996
+ keep_recent_turns=config.keep_recent_turns,
997
+ )
998
+ return Agent(
999
+ base_agent.provider,
1000
+ base_agent.registry,
1001
+ context,
1002
+ AutoApprover(config.approval_policy), # non-interactive during eval
1003
+ NullTelemetry(),
1004
+ config,
1005
+ context_source=base_agent.context_source,
1006
+ )
1007
+
1008
+ judge_model = judge_model or config.judge_model
1009
+ judge = (
1010
+ make_provider(replace(config, model=judge_model))
1011
+ if judge_model
1012
+ else base_agent.provider
1013
+ )
1014
+ evaluator = Evaluator(
1015
+ agent_factory, judge,
1016
+ default_rubric=effective_default, pass_threshold=config.eval_threshold,
1017
+ )
1018
+ try:
1019
+ summary = evaluator.run_suite(cases)
1020
+ finally:
1021
+ telemetry.close()
1022
+ for client in mcp_clients:
1023
+ client.close()
1024
+ sandbox.close()
1025
+
1026
+ for result in summary.results:
1027
+ mark = "PASS" if result.passed else "FAIL"
1028
+ output_fn(f" [{mark}] {result.name} score={result.score:.2f} {result.reasoning}")
1029
+ output_fn(
1030
+ f"{summary.passed}/{summary.total} passed "
1031
+ f"pass_rate={summary.pass_rate:.0%} mean_score={summary.mean_score:.2f}"
1032
+ )
1033
+ if output_path:
1034
+ Path(output_path).write_text(
1035
+ json.dumps(summary.to_dict(), indent=2), encoding="utf-8"
1036
+ )
1037
+ output_fn(f"[report written to {output_path}]")
1038
+ return 0 if summary.passed == summary.total else 1
1039
+
1040
+
1041
+ def run_loop(
1042
+ config: Config,
1043
+ *,
1044
+ loop_file: str | None = None,
1045
+ skill: str | None = None,
1046
+ max_iterations: int | None = None,
1047
+ check: str | None = None,
1048
+ streak: int | None = None,
1049
+ output_fn: Callable[[str], None] = print,
1050
+ ) -> int:
1051
+ """Run a loop-engineering workflow until its stopping condition (Phase 4+).
1052
+
1053
+ Returns 0 if the loop succeeded (reached its success streak), else 1.
1054
+ """
1055
+ from agentkernel.loops import LoopRunner, load_loop, loop_from_skill
1056
+
1057
+ sandbox = make_sandbox(
1058
+ config.sandbox, config.working_dir,
1059
+ image=config.sandbox_image, network=config.sandbox_network,
1060
+ )
1061
+ base_agent, telemetry, mcp_clients = build_runtime(config, sandbox=sandbox)
1062
+
1063
+ if loop_file:
1064
+ loop = load_loop(loop_file)
1065
+ if max_iterations is not None:
1066
+ loop.max_iterations = max_iterations
1067
+ if check is not None:
1068
+ loop.success_check = check
1069
+ if streak is not None:
1070
+ loop.success_streak = streak
1071
+ elif skill:
1072
+ loop = loop_from_skill(
1073
+ base_agent.context_source, skill,
1074
+ max_iterations=max_iterations or 5,
1075
+ success_check=check,
1076
+ success_streak=streak or 1,
1077
+ cwd=config.working_dir,
1078
+ )
1079
+ if loop is None:
1080
+ telemetry.close()
1081
+ for c in mcp_clients:
1082
+ c.close()
1083
+ sandbox.close()
1084
+ output_fn(f"[skill not found: {skill}]")
1085
+ return 1
1086
+ else:
1087
+ telemetry.close()
1088
+ for c in mcp_clients:
1089
+ c.close()
1090
+ sandbox.close()
1091
+ output_fn("[loop requires --file or --skill]")
1092
+ return 1
1093
+
1094
+ def agent_factory() -> Agent:
1095
+ context = ContextManager(
1096
+ budget=base_agent.provider.context_window - config.output_reserve,
1097
+ keep_recent_turns=config.keep_recent_turns,
1098
+ )
1099
+ return Agent(
1100
+ base_agent.provider, base_agent.registry, context,
1101
+ AutoApprover(config.approval_policy), NullTelemetry(), config,
1102
+ context_source=base_agent.context_source,
1103
+ )
1104
+
1105
+ output_fn(f"[loop: {loop.name} — max {loop.max_iterations} iterations]")
1106
+ runner = LoopRunner(agent_factory, sandbox=sandbox, output_fn=output_fn)
1107
+ try:
1108
+ result = runner.run(loop)
1109
+ finally:
1110
+ telemetry.close()
1111
+ for c in mcp_clients:
1112
+ c.close()
1113
+ sandbox.close()
1114
+
1115
+ verdict = "SUCCEEDED" if result.succeeded else "stopped without success"
1116
+ output_fn(f"{loop.name}: {verdict} after {result.count} iteration(s).")
1117
+ return 0 if result.succeeded else 1
1118
+
1119
+
1120
+ def _read_prompt_file(path: str) -> str:
1121
+ try:
1122
+ return Path(path).read_text(encoding="utf-8")
1123
+ except OSError as exc:
1124
+ raise SystemExit(f"could not read prompt file: {exc}")
1125
+
1126
+
1127
+ def _active_profile(config: Config, args: argparse.Namespace) -> Profile | None:
1128
+ name = args.profile or config.profile
1129
+ if not name:
1130
+ return None
1131
+ profile_dir = Path(config.profile_dir) if config.profile_dir else None
1132
+ search_dirs = [profile_dir] if profile_dir else None
1133
+ return load_profile(name, search_dirs=search_dirs)
1134
+
1135
+
1136
+ def main(argv: list[str] | None = None) -> int:
1137
+ parser = argparse.ArgumentParser(prog="agentkernel", description="Agent kernel CLI")
1138
+ parser.add_argument(
1139
+ "--config",
1140
+ default=None,
1141
+ help="explicit TOML config file (skips global/project discovery)",
1142
+ )
1143
+ parser.add_argument(
1144
+ "-C",
1145
+ "--cwd",
1146
+ default=None,
1147
+ help="run as if launched from this directory (sets the project root)",
1148
+ )
1149
+ parser.add_argument(
1150
+ "--verbose-trace",
1151
+ action="store_true",
1152
+ help="log raw tool arguments to the trace (local debugging only)",
1153
+ )
1154
+ parser.add_argument(
1155
+ "--no-progress",
1156
+ action="store_true",
1157
+ help="disable per-turn progress lines in run/repl modes",
1158
+ )
1159
+ parser.add_argument(
1160
+ "--no-stream",
1161
+ action="store_true",
1162
+ help="disable live token streaming (print the answer when complete)",
1163
+ )
1164
+ parser.add_argument(
1165
+ "--profile",
1166
+ help="active profile name (overrides config.profile)",
1167
+ )
1168
+ parser.add_argument(
1169
+ "--model",
1170
+ help="model override for this session (overrides config.model and profile.model_override)",
1171
+ )
1172
+ parser.add_argument(
1173
+ "--memory",
1174
+ choices=("file", "memory"),
1175
+ help="enable a built-in memory store (overrides config.memory_store)",
1176
+ )
1177
+ parser.add_argument(
1178
+ "--resume",
1179
+ metavar="SESSION_ID",
1180
+ help="resume a saved session by id (run/repl); requires a memory store",
1181
+ )
1182
+ parser.add_argument(
1183
+ "--skill",
1184
+ action="append",
1185
+ default=[],
1186
+ help="activate a skill for this session (repeatable)",
1187
+ )
1188
+ subparsers = parser.add_subparsers(dest="command")
1189
+ subparsers.add_parser("repl", help="interactive REPL")
1190
+ subparsers.add_parser("tui", help="interactive curses-based terminal UI")
1191
+ run_parser = subparsers.add_parser("run", help="single non-interactive run")
1192
+ run_parser.add_argument("prompt", nargs="?", help="text prompt")
1193
+ run_parser.add_argument("--file", help="path to a file containing the prompt")
1194
+ run_parser.add_argument(
1195
+ "--background",
1196
+ action="store_true",
1197
+ help="run detached in the background; output goes to a file",
1198
+ )
1199
+ improve_parser = subparsers.add_parser(
1200
+ "improve", help="reflect on a session trace and write an improvement note"
1201
+ )
1202
+ improve_parser.add_argument(
1203
+ "--trace", help="trace file to analyze (default: latest in log_dir)"
1204
+ )
1205
+ eval_parser = subparsers.add_parser(
1206
+ "eval", help="run an eval suite and score the answers with a judge model"
1207
+ )
1208
+ eval_parser.add_argument("--suite", required=True, help="path to a TOML eval suite")
1209
+ eval_parser.add_argument(
1210
+ "--judge-model", help="model to score answers (default: config.judge_model)"
1211
+ )
1212
+ eval_parser.add_argument(
1213
+ "--output", "-o", help="write a JSON evaluation report to this path"
1214
+ )
1215
+ eval_parser.add_argument(
1216
+ "--case",
1217
+ action="append",
1218
+ default=[],
1219
+ help="run only matching case names/globs (repeatable)",
1220
+ )
1221
+ loop_parser = subparsers.add_parser(
1222
+ "loop", help="run a repeatable workflow loop with a stopping condition"
1223
+ )
1224
+ loop_parser.add_argument("--file", help="path to a loop TOML file")
1225
+ loop_parser.add_argument("--skill", help="use a skill's body as the loop prompt")
1226
+ loop_parser.add_argument("--max-iterations", type=int, help="iteration cap")
1227
+ loop_parser.add_argument("--check", help="success shell command (exit 0 = success)")
1228
+ loop_parser.add_argument(
1229
+ "--streak", type=int, help="consecutive successes required to stop"
1230
+ )
1231
+ insights_parser = subparsers.add_parser(
1232
+ "insights", help="aggregate session traces into a usage/cost report"
1233
+ )
1234
+ insights_parser.add_argument(
1235
+ "--days", type=int, help="only include records from the last N days"
1236
+ )
1237
+ subparsers.add_parser("doctor", help="check config, dependencies, and credentials")
1238
+ memory_parser = subparsers.add_parser(
1239
+ "memory", help="curate long-term memory (extract facts, consolidate)"
1240
+ )
1241
+ memory_parser.add_argument(
1242
+ "action", choices=("extract", "consolidate"), help="what to do"
1243
+ )
1244
+ memory_parser.add_argument(
1245
+ "--session", help="session id to extract from (default: the only session)"
1246
+ )
1247
+ sessions_parser = subparsers.add_parser(
1248
+ "sessions", help="list, show, or delete saved sessions"
1249
+ )
1250
+ sessions_parser.add_argument(
1251
+ "action", choices=("list", "show", "delete"), help="what to do"
1252
+ )
1253
+ sessions_parser.add_argument("session_id", nargs="?", help="session id (show/delete)")
1254
+ cron_parser = subparsers.add_parser(
1255
+ "cron", help="manage scheduled jobs (list/add/remove/run/tick)"
1256
+ )
1257
+ cron_parser.add_argument(
1258
+ "action", choices=("list", "add", "remove", "run", "tick")
1259
+ )
1260
+ cron_parser.add_argument(
1261
+ "rest", nargs="*",
1262
+ help="add: <schedule> <prompt...>; remove/run: <job_id>",
1263
+ )
1264
+ kanban_parser = subparsers.add_parser(
1265
+ "kanban", help="manage the shared work board (list/add/show/complete/remove)"
1266
+ )
1267
+ kanban_parser.add_argument(
1268
+ "action", choices=("list", "add", "show", "complete", "block", "remove")
1269
+ )
1270
+ kanban_parser.add_argument(
1271
+ "rest", nargs="*", help="add: <title...>; show/complete/block/remove: <task_id>"
1272
+ )
1273
+ init_parser = subparsers.add_parser(
1274
+ "init", help="scaffold a starter agentkernel.toml (project or --global)"
1275
+ )
1276
+ init_parser.add_argument(
1277
+ "--global", dest="global_config", action="store_true",
1278
+ help="write the user-global ~/.agentkernel/config.toml instead of a project file",
1279
+ )
1280
+ init_parser.add_argument(
1281
+ "--force", action="store_true", help="overwrite if the config already exists"
1282
+ )
1283
+ new_parser = subparsers.add_parser(
1284
+ "new", help="scaffold a skill, profile, loop, or eval suite from a template"
1285
+ )
1286
+ new_parser.add_argument(
1287
+ "kind", choices=("skill", "profile", "loop", "eval"), help="what to create"
1288
+ )
1289
+ new_parser.add_argument("name", help="name for the new asset (kebab-case)")
1290
+ new_parser.add_argument(
1291
+ "--force", action="store_true", help="overwrite if the target already exists"
1292
+ )
1293
+
1294
+ args = parser.parse_args(argv)
1295
+ command = getattr(args, "command", None) or "repl"
1296
+
1297
+ if command == "run" and not args.prompt and not args.file:
1298
+ run_parser.error("the following arguments are required: prompt or --file")
1299
+
1300
+ # `init` and `new` scaffold files; they need no provider/config/runtime.
1301
+ if command == "init":
1302
+ return run_init(
1303
+ target_dir=args.cwd or ".",
1304
+ global_config=getattr(args, "global_config", False),
1305
+ force=args.force,
1306
+ )
1307
+ if command == "new":
1308
+ return run_new(args.kind, args.name, force=args.force)
1309
+
1310
+ config, project_config_path = resolve_config(args.config, cwd=args.cwd or ".")
1311
+ # The concrete config path to hand to subprocesses / MCP discovery.
1312
+ effective_config_path = args.config or (
1313
+ str(project_config_path) if project_config_path else None
1314
+ )
1315
+ if args.skill:
1316
+ config.skills = list(dict.fromkeys(config.skills + args.skill))
1317
+ if args.model:
1318
+ config.model = args.model
1319
+
1320
+ # `insights` and `doctor` read config but need no provider/runtime (§18.7).
1321
+ if command == "insights":
1322
+ from agentkernel.insights import aggregate_traces, format_insights
1323
+
1324
+ days = getattr(args, "days", None)
1325
+ print(format_insights(aggregate_traces(config.log_dir, days=days), days=days))
1326
+ return 0
1327
+ if command == "doctor":
1328
+ from agentkernel.doctor import format_checks, has_failures, run_checks
1329
+
1330
+ checks = run_checks(config)
1331
+ print(format_checks(checks))
1332
+ return 1 if has_failures(checks) else 0
1333
+ if command == "sessions":
1334
+ return run_sessions(config, args.action, getattr(args, "session_id", None))
1335
+ if command == "memory":
1336
+ return run_memory(config, args.action, session=getattr(args, "session", None))
1337
+ if command == "cron":
1338
+ return run_cron(config, args.action, args.rest)
1339
+ if command == "kanban":
1340
+ return run_kanban(config, args.action, args.rest)
1341
+ if command == "run" and getattr(args, "background", False):
1342
+ prompt = _read_prompt_file(args.file) if args.file else (args.prompt or "")
1343
+ return run_background(
1344
+ prompt,
1345
+ config_path=effective_config_path,
1346
+ cwd=config.working_dir,
1347
+ log_dir=config.log_dir,
1348
+ )
1349
+
1350
+ # Load profile early so its model_override and rubric feed into config for
1351
+ # every command (run, repl, eval, loop, improve).
1352
+ active_profile = _active_profile(config, args)
1353
+ if active_profile is not None:
1354
+ if not args.model and active_profile.model_override:
1355
+ config.model = active_profile.model_override
1356
+ if active_profile.rubric and config.eval_rubric is None:
1357
+ config.eval_rubric = active_profile.rubric
1358
+
1359
+ # 'improve' is a self-contained tool: it needs only a provider, not the full
1360
+ # runtime (no MCP servers, sandbox, or session trace file).
1361
+ if command == "improve":
1362
+ return run_improve(config, trace=getattr(args, "trace", None))
1363
+
1364
+ if command == "eval":
1365
+ return run_eval(
1366
+ config,
1367
+ args.suite,
1368
+ judge_model=getattr(args, "judge_model", None),
1369
+ output_path=getattr(args, "output", None),
1370
+ case_filter=args.case or None,
1371
+ )
1372
+
1373
+ if command == "tui":
1374
+ from agentkernel.tui import run_tui
1375
+ return run_tui(config)
1376
+
1377
+ if command == "loop":
1378
+ return run_loop(
1379
+ config,
1380
+ loop_file=args.file,
1381
+ skill=args.skill,
1382
+ max_iterations=getattr(args, "max_iterations", None),
1383
+ check=args.check,
1384
+ streak=args.streak,
1385
+ )
1386
+
1387
+ # Merge MCP servers from the global config and the project config (or just
1388
+ # the explicit file), so servers can be declared once for all projects.
1389
+ if args.config:
1390
+ mcp_servers = load_mcp_servers(args.config)
1391
+ else:
1392
+ mcp_servers = []
1393
+ gpath = global_config_path(agent_home())
1394
+ if gpath.is_file():
1395
+ mcp_servers += load_mcp_servers(gpath)
1396
+ if project_config_path is not None:
1397
+ mcp_servers += load_mcp_servers(project_config_path)
1398
+ budget = BudgetGuard(
1399
+ max_cost_usd=config.max_cost_usd,
1400
+ max_input_tokens=config.max_input_tokens_per_run,
1401
+ model=config.model,
1402
+ )
1403
+
1404
+ memory_kind = args.memory or config.memory_store
1405
+ memory_dir = config.memory_dir or str(Path(config.log_dir).parent / "memory")
1406
+ memory = make_memory_store(memory_kind, memory_dir)
1407
+
1408
+ resume_id = getattr(args, "resume", None)
1409
+ if resume_id and memory is None:
1410
+ print(
1411
+ "[warning] --resume needs a memory store; enable one with --memory file "
1412
+ "or memory_store in config. Starting a fresh session.",
1413
+ file=sys.stderr,
1414
+ )
1415
+ resume_id = None
1416
+
1417
+ sandbox = make_sandbox(
1418
+ config.sandbox,
1419
+ config.working_dir,
1420
+ image=config.sandbox_image,
1421
+ network=config.sandbox_network,
1422
+ )
1423
+ try:
1424
+ agent, telemetry, mcp_clients = build_runtime(
1425
+ config,
1426
+ mcp_servers=mcp_servers,
1427
+ verbose=args.verbose_trace,
1428
+ budget=budget,
1429
+ memory=memory,
1430
+ sandbox=sandbox,
1431
+ session_id=resume_id,
1432
+ )
1433
+ except (ProviderError, MCPError) as exc:
1434
+ sandbox.close()
1435
+ print(f"[startup error] {exc}")
1436
+ return 1
1437
+
1438
+ profile = active_profile
1439
+
1440
+ # Live streaming writes model text to stdout as it arrives. When on, skip the
1441
+ # per-turn progress lines so they don't interleave with the streamed text.
1442
+ streaming = getattr(config, "stream", True) and not args.no_stream
1443
+
1444
+ def _stdout_stream(text: str) -> None:
1445
+ sys.stdout.write(text)
1446
+ sys.stdout.flush()
1447
+
1448
+ stream_fn = _stdout_stream if streaming else None
1449
+
1450
+ if not args.no_progress and not streaming:
1451
+ telemetry = ProgressTelemetry(telemetry, output_fn=print)
1452
+ agent.telemetry = telemetry
1453
+
1454
+ try:
1455
+ print(f"[session trace: {telemetry.path}]")
1456
+ if config.sandbox == "docker":
1457
+ print(
1458
+ f"[sandbox: docker image={config.sandbox_image} "
1459
+ f"network={config.sandbox_network}]"
1460
+ )
1461
+ if memory_kind:
1462
+ print(f"[memory: {memory_kind} @ {memory_dir}]")
1463
+ if mcp_clients:
1464
+ print(f"[connected MCP servers: {', '.join(s.name for s in mcp_servers)}]")
1465
+
1466
+ if command == "run":
1467
+ prompt = args.prompt
1468
+ if args.file:
1469
+ prompt = _read_prompt_file(args.file)
1470
+ return run_once(
1471
+ agent, prompt or "", profile=profile, stream_fn=stream_fn, config=config
1472
+ )
1473
+ return repl(agent, config=config, stream_fn=stream_fn)
1474
+ finally:
1475
+ telemetry.close()
1476
+ for client in mcp_clients:
1477
+ client.close()
1478
+ sandbox.close()
1479
+
1480
+
1481
+ if __name__ == "__main__":
1482
+ raise SystemExit(main())