base-agentkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. agentkit/__init__.py +35 -0
  2. agentkit/agent/__init__.py +7 -0
  3. agentkit/agent/agent.py +368 -0
  4. agentkit/agent/budgets.py +48 -0
  5. agentkit/agent/report.py +166 -0
  6. agentkit/agent/tool_runtime.py +77 -0
  7. agentkit/cli/__init__.py +5 -0
  8. agentkit/cli/main.py +108 -0
  9. agentkit/config/__init__.py +23 -0
  10. agentkit/config/loader.py +108 -0
  11. agentkit/config/provider_defaults.py +96 -0
  12. agentkit/config/schema.py +148 -0
  13. agentkit/constants.py +21 -0
  14. agentkit/errors.py +58 -0
  15. agentkit/llm/__init__.py +53 -0
  16. agentkit/llm/base.py +36 -0
  17. agentkit/llm/factory.py +27 -0
  18. agentkit/llm/providers/__init__.py +15 -0
  19. agentkit/llm/providers/anthropic_provider.py +371 -0
  20. agentkit/llm/providers/gemini_provider.py +396 -0
  21. agentkit/llm/providers/openai_provider.py +881 -0
  22. agentkit/llm/providers/qwen_provider.py +34 -0
  23. agentkit/llm/providers/vllm_provider.py +47 -0
  24. agentkit/llm/types.py +215 -0
  25. agentkit/llm/usage.py +72 -0
  26. agentkit/py.typed +0 -0
  27. agentkit/runlog/__init__.py +15 -0
  28. agentkit/runlog/events.py +67 -0
  29. agentkit/runlog/jsonl.py +90 -0
  30. agentkit/runlog/recorder.py +94 -0
  31. agentkit/runlog/sinks.py +15 -0
  32. agentkit/tools/__init__.py +16 -0
  33. agentkit/tools/base.py +139 -0
  34. agentkit/tools/library/__init__.py +8 -0
  35. agentkit/tools/library/_fs_common.py +330 -0
  36. agentkit/tools/library/create_file.py +168 -0
  37. agentkit/tools/library/fs_tools.py +21 -0
  38. agentkit/tools/library/str_replace.py +241 -0
  39. agentkit/tools/library/view.py +372 -0
  40. agentkit/tools/library/word_count.py +138 -0
  41. agentkit/tools/loader.py +81 -0
  42. agentkit/tools/registry.py +284 -0
  43. agentkit/tools/types.py +98 -0
  44. agentkit/workspace/__init__.py +6 -0
  45. agentkit/workspace/fs.py +288 -0
  46. agentkit/workspace/layout.py +33 -0
  47. base_agentkit-0.1.0.dist-info/METADATA +142 -0
  48. base_agentkit-0.1.0.dist-info/RECORD +51 -0
  49. base_agentkit-0.1.0.dist-info/WHEEL +4 -0
  50. base_agentkit-0.1.0.dist-info/entry_points.txt +3 -0
  51. base_agentkit-0.1.0.dist-info/licenses/LICENSE +183 -0
agentkit/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ """Generic LLM agent framework."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from .agent.agent import Agent
8
+ from .config.loader import load_config
9
+ from .config.schema import AgentkitConfig
10
+
11
+
12
+ def create_agent(
13
+ config_or_path: AgentkitConfig | str | Path,
14
+ ) -> Agent:
15
+ """Build an :class:`Agent` from config data or a config file.
16
+
17
+ Args:
18
+ config_or_path: Either a fully instantiated framework config object or a
19
+ filesystem path to a YAML/JSON config file.
20
+
21
+ Returns:
22
+ Agent: A configured agent instance ready to execute tasks.
23
+
24
+ Raises:
25
+ agentkit.errors.ConfigError: If the config file is invalid or missing required
26
+ fields.
27
+ """
28
+ if isinstance(config_or_path, AgentkitConfig):
29
+ config = config_or_path
30
+ else:
31
+ config = load_config(config_or_path)
32
+ return Agent.from_config(config)
33
+
34
+
35
+ __all__ = ["Agent", "AgentkitConfig", "create_agent", "load_config"]
@@ -0,0 +1,7 @@
1
+ """Public agent runtime exports."""
2
+
3
+ from .agent import Agent
4
+ from .report import RunReport
5
+ from .tool_runtime import AgentToolRuntime
6
+
7
+ __all__ = ["Agent", "AgentToolRuntime", "RunReport"]
@@ -0,0 +1,368 @@
1
+ """Agent core loop: model inference <-> tool execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from dataclasses import asdict
7
+ from typing import Any
8
+
9
+ from agentkit.agent.budgets import RuntimeBudget
10
+ from agentkit.agent.report import RunReport, RunReportProjector
11
+ from agentkit.agent.tool_runtime import AgentToolRuntime
12
+ from agentkit.config.schema import AgentkitConfig
13
+ from agentkit.errors import ProviderError
14
+ from agentkit.llm.base import BaseLLMProvider
15
+ from agentkit.llm.factory import build_provider
16
+ from agentkit.llm.types import (
17
+ ConversationItem,
18
+ ConversationState,
19
+ GenerationOptions,
20
+ MessageItem,
21
+ ToolCallItem,
22
+ ToolResultItem,
23
+ UnifiedLLMRequest,
24
+ Usage,
25
+ )
26
+ from agentkit.llm.usage import merge_usage, usage_to_payload
27
+ from agentkit.runlog import JsonlRunLogSink
28
+ from agentkit.tools.loader import load_tools_from_library
29
+ from agentkit.tools.registry import ToolRegistry
30
+ from agentkit.runlog.recorder import RunRecorder
31
+ from agentkit.workspace.fs import WorkspaceFS
32
+ from agentkit.workspace.layout import init_workspace_layout
33
+
34
+
35
+ class Agent:
36
+ """Coordinate model calls, tool execution, and event-driven projections.
37
+
38
+ The Agent orchestrates the core loop:
39
+
40
+ User Task
41
+
42
+ Model Inference
43
+
44
+ Tool Calls (optional)
45
+
46
+ Tool Execution
47
+
48
+ Model Continues
49
+
50
+ Final Result
51
+
52
+ All events are recorded and projected into a RunReport and optional run logs.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ *,
58
+ config: AgentkitConfig,
59
+ fs: WorkspaceFS,
60
+ provider: BaseLLMProvider,
61
+ tool_runtime: AgentToolRuntime,
62
+ runlog_sink: JsonlRunLogSink,
63
+ ) -> None:
64
+ self.config = config
65
+ self.fs = fs
66
+ self.provider = provider
67
+ self.tool_runtime = tool_runtime
68
+ self.runlog_sink = runlog_sink
69
+
70
+ @classmethod
71
+ def from_config(cls, config: AgentkitConfig) -> "Agent":
72
+ """Build an Agent from validated configuration."""
73
+
74
+ # Initialize workspace directory layout
75
+ workspace_root = init_workspace_layout(config.workspace.root)
76
+ fs = WorkspaceFS(workspace_root)
77
+
78
+ # Build the configured LLM provider
79
+ provider = build_provider(config.provider)
80
+
81
+ # Load tools from the workspace library and register them
82
+ registry = ToolRegistry()
83
+ registry.register_many(load_tools_from_library(fs))
84
+
85
+ # Create tool runtime with allowlist filtering
86
+ tool_runtime = AgentToolRuntime(registry, config.tools.allowed)
87
+
88
+ # Initialize run logging
89
+ runlog_sink = JsonlRunLogSink(fs, config.runlog)
90
+
91
+ return cls(
92
+ config=config,
93
+ fs=fs,
94
+ provider=provider,
95
+ tool_runtime=tool_runtime,
96
+ runlog_sink=runlog_sink,
97
+ )
98
+
99
+ def run(self, task: str) -> RunReport:
100
+ """Execute the full model-tool loop for a single task.
101
+
102
+ The run proceeds as:
103
+
104
+ 1. Initialize runtime state
105
+ 2. Call model
106
+ 3. Execute tools if requested
107
+ 4. Feed tool results back to model
108
+ 5. Repeat until model completes
109
+ 6. Produce final RunReport
110
+ """
111
+
112
+ # Project events into both a final report and the optional run log.
113
+ report_projector = RunReportProjector()
114
+ recorder = RunRecorder(sinks=[report_projector, self.runlog_sink])
115
+
116
+ tool_specs = self.tool_runtime.schemas() # tool schemas exposed to the LLM
117
+ instructions = self.config.agent.system_prompt # system prompt
118
+ options = GenerationOptions(
119
+ temperature=self.config.provider.temperature,
120
+ reasoning_effort=self.config.provider.reasoning_effort,
121
+ thinking_enabled=self.config.provider.enable_thinking,
122
+ ) # generation parameters
123
+
124
+ # Start run recording
125
+ run_id = recorder.start_run(
126
+ task=task,
127
+ context={
128
+ "provider": self.config.provider.kind,
129
+ "model": self.config.provider.model,
130
+ "conversation_mode": self.config.provider.conversation_mode,
131
+ "instructions": instructions,
132
+ "tools": [asdict(tool) for tool in tool_specs],
133
+ "options": asdict(options),
134
+ },
135
+ )
136
+
137
+ # Runtime guardrails
138
+ budget = RuntimeBudget(
139
+ max_steps=self.config.agent.budget.max_steps,
140
+ time_budget_s=self.config.agent.budget.time_budget_s,
141
+ )
142
+
143
+ # Conversation state holds full chat history
144
+ state = ConversationState(mode=self.config.provider.conversation_mode)
145
+
146
+ # Inputs for the next model turn
147
+ next_inputs: list[ConversationItem] = [MessageItem(role="user", text=task)]
148
+
149
+ step = 0
150
+ run_closed = False
151
+ model_step_count = 0
152
+ tool_call_count = 0
153
+ aggregate_usage = Usage()
154
+
155
+ # Track last response for better error reporting
156
+ last_response_reason: str | None = None
157
+ last_response_output = ""
158
+
159
+ try:
160
+ while True:
161
+ # Ensure execution stays within configured limits
162
+ budget.ensure_can_continue(step)
163
+
164
+ # Build the unified LLM request
165
+ req = UnifiedLLMRequest(
166
+ provider=self.config.provider.kind,
167
+ model=self.config.provider.model,
168
+ state=state,
169
+ inputs=list(next_inputs),
170
+ instructions=instructions,
171
+ tools=tool_specs,
172
+ options=options,
173
+ )
174
+
175
+ # Call LLM provider
176
+ call_start = time.perf_counter()
177
+ response = self.provider.generate(req)
178
+ call_ms = (time.perf_counter() - call_start) * 1000
179
+
180
+ last_response_reason = response.reason
181
+ last_response_output = response.output_text
182
+ merge_usage(aggregate_usage, response.usage)
183
+
184
+ # Update conversation history
185
+ state.history.extend(req.inputs)
186
+ state.history.extend(response.output_items)
187
+
188
+ # Apply provider state patches (cursor updates etc.)
189
+ if response.state_patch.new_provider_cursor is not None:
190
+ state.provider_cursor = response.state_patch.new_provider_cursor
191
+
192
+ if response.state_patch.provider_meta_patch:
193
+ state.provider_meta.update(response.state_patch.provider_meta_patch)
194
+
195
+ # Record model response event
196
+ recorder.emit(
197
+ "model_responded",
198
+ step=step,
199
+ payload={
200
+ "status": response.status,
201
+ "reason": response.reason,
202
+ "output_text": response.output_text,
203
+ "duration_ms": call_ms,
204
+ "requested_tools": [
205
+ self._serialize_item(call) for call in response.tool_calls
206
+ ],
207
+ "request": {
208
+ "state_cursor": req.state.provider_cursor,
209
+ "inputs": [self._serialize_item(i) for i in req.inputs],
210
+ },
211
+ "response": {
212
+ "response_id": response.response_id,
213
+ "output_items": [
214
+ self._serialize_item(i) for i in response.output_items
215
+ ],
216
+ "usage": asdict(response.usage),
217
+ "state_patch": asdict(response.state_patch),
218
+ "raw_response": response.raw_response,
219
+ },
220
+ },
221
+ )
222
+
223
+ model_step_count += 1
224
+ next_inputs = []
225
+
226
+ # Branch based on provider response status
227
+ match response.status:
228
+ case "requires_tool":
229
+ # Model requested tool execution
230
+
231
+ if not response.tool_calls:
232
+ raise ProviderError(
233
+ "Model turn requested tool execution but returned no tool calls."
234
+ )
235
+
236
+ # Execute tools and collect results
237
+ for call in response.tool_calls:
238
+ outcome = self.tool_runtime.execute(call)
239
+
240
+ # Tool result becomes next model input
241
+ next_inputs.append(
242
+ self.tool_runtime.build_result_item(outcome)
243
+ )
244
+
245
+ # Record tool execution
246
+ recorder.emit(
247
+ "tool_executed",
248
+ step=step,
249
+ payload=outcome.to_event_payload(),
250
+ )
251
+
252
+ tool_call_count += 1
253
+
254
+ step += 1
255
+ continue
256
+
257
+ case "completed" | "blocked" | "incomplete":
258
+ # Run finished (successfully or otherwise)
259
+
260
+ if response.reason == "pause":
261
+ raise ProviderError(
262
+ "Model turn paused with reason=pause. Automatic continuation is not implemented."
263
+ )
264
+
265
+ recorder.end_run(
266
+ status=response.status,
267
+ payload={
268
+ "reason": response.reason,
269
+ "step_count": model_step_count,
270
+ "tool_call_count": tool_call_count,
271
+ "final_output": response.output_text,
272
+ "usage": usage_to_payload(aggregate_usage),
273
+ },
274
+ )
275
+
276
+ run_closed = True
277
+ break
278
+
279
+ case "failed":
280
+ # Provider explicitly reported failure
281
+ raise ProviderError(
282
+ f"Model turn failed with reason={response.reason}."
283
+ )
284
+
285
+ case _:
286
+ # Unknown response status
287
+ raise ProviderError(
288
+ f"Model turn returned unsupported status={response.status!r}."
289
+ )
290
+
291
+ except Exception as exc:
292
+ # Ensure failed runs are properly recorded
293
+ if not run_closed:
294
+ payload = {
295
+ "step_count": model_step_count,
296
+ "tool_call_count": tool_call_count,
297
+ "error_type": type(exc).__name__,
298
+ "error_message": str(exc),
299
+ }
300
+
301
+ if last_response_reason is not None:
302
+ payload["reason"] = last_response_reason
303
+
304
+ if last_response_output:
305
+ payload["final_output"] = last_response_output
306
+
307
+ payload["usage"] = usage_to_payload(aggregate_usage)
308
+
309
+ recorder.end_run(
310
+ status="failed",
311
+ payload=payload,
312
+ )
313
+
314
+ raise
315
+
316
+ # Build final report from recorded events
317
+ report = report_projector.build(
318
+ runlog_path=(
319
+ str(self.runlog_sink.runlog_path_for_run(run_id))
320
+ if self.runlog_sink.enabled
321
+ else None
322
+ )
323
+ )
324
+
325
+ # Safety check to ensure run IDs match
326
+ if report.run_id != run_id:
327
+ raise RuntimeError(
328
+ "Run report projection mismatch: run_id changed during recording."
329
+ )
330
+
331
+ return report
332
+
333
+ def _serialize_item(self, item: ConversationItem) -> dict[str, Any]:
334
+ """Convert a conversation item into a run-log friendly structure.
335
+
336
+ This avoids leaking internal objects into logs and ensures JSON-safe output.
337
+ """
338
+
339
+ if isinstance(item, MessageItem):
340
+ return {"kind": "message", "role": item.role, "text": item.text}
341
+
342
+ if isinstance(item, ToolCallItem):
343
+ return {
344
+ "kind": "tool_call",
345
+ "call_id": item.call_id,
346
+ "name": item.name,
347
+ "arguments": item.arguments,
348
+ "raw_arguments": item.raw_arguments,
349
+ }
350
+
351
+ if isinstance(item, ToolResultItem):
352
+ return {
353
+ "kind": "tool_result",
354
+ "call_id": item.call_id,
355
+ "tool_name": item.tool_name,
356
+ "payload": item.payload,
357
+ "output_text": item.output_text,
358
+ "is_error": item.is_error,
359
+ }
360
+
361
+ # Default: reasoning / thinking items
362
+ return {
363
+ "kind": "reasoning",
364
+ "text": item.text,
365
+ "summary": item.summary,
366
+ "raw_item": item.raw_item,
367
+ "replay_hint": item.replay_hint,
368
+ }
@@ -0,0 +1,48 @@
1
+ """Runtime budget guards."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+
7
+ from agentkit.errors import BudgetExceededError
8
+
9
+
10
+ class RuntimeBudget:
11
+ """Track and enforce runtime limits for a single agent run."""
12
+
13
+ def __init__(self, *, max_steps: int, time_budget_s: int) -> None:
14
+ """Initialize budget thresholds and start timing.
15
+
16
+ Args:
17
+ max_steps: Maximum number of model/tool loop iterations allowed.
18
+ time_budget_s: Maximum wall-clock runtime in seconds.
19
+
20
+ Returns:
21
+ None
22
+ """
23
+ self.max_steps = max_steps
24
+ self.time_budget_s = time_budget_s
25
+ self.started_monotonic = time.monotonic()
26
+
27
+ def ensure_can_continue(self, step_index: int) -> None:
28
+ """Raise if the run exceeded configured step or time budgets.
29
+
30
+ Args:
31
+ step_index: Zero-based index of the next step to execute.
32
+
33
+ Returns:
34
+ None
35
+
36
+ Raises:
37
+ agentkit.errors.BudgetExceededError: If the step count or elapsed time is
38
+ above configured limits.
39
+ """
40
+ if step_index >= self.max_steps:
41
+ raise BudgetExceededError(
42
+ f"Step budget exceeded: step={step_index}, max_steps={self.max_steps}"
43
+ )
44
+ elapsed = time.monotonic() - self.started_monotonic
45
+ if elapsed > self.time_budget_s:
46
+ raise BudgetExceededError(
47
+ f"Time budget exceeded: elapsed={elapsed:.1f}s, budget={self.time_budget_s}s"
48
+ )
@@ -0,0 +1,166 @@
1
+ """Run report projection from canonical run events."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import asdict, dataclass, field
6
+ from typing import Any
7
+
8
+ from agentkit.llm.types import Usage
9
+ from agentkit.llm.usage import usage_from_payload
10
+ from agentkit.runlog.events import RunEvent
11
+ from agentkit.runlog.sinks import RunEventSink
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class RunStep:
16
+ """One model turn summary in the returned run report."""
17
+
18
+ step: int
19
+ assistant_text: str
20
+ tool_calls: list[str] = field(default_factory=list)
21
+ ts: str = ""
22
+
23
+
24
+ @dataclass(slots=True)
25
+ class RunToolCall:
26
+ """One tool execution record in the returned run report."""
27
+
28
+ step: int
29
+ call_id: str
30
+ name: str
31
+ arguments: dict[str, Any]
32
+ is_error: bool
33
+ output: Any = None
34
+ error: str | None = None
35
+ model_payload: Any = None
36
+ duration_ms: float | None = None
37
+ ts: str = ""
38
+
39
+
40
+ @dataclass(slots=True)
41
+ class RunReport:
42
+ """Structured result object returned by :meth:`agentkit.agent.Agent.run`."""
43
+
44
+ task: str
45
+ started_at: str
46
+ run_id: str
47
+ runlog_path: str | None = None
48
+ status: str = "failed"
49
+ completed: bool = False
50
+ final_output: str = ""
51
+ reason: str | None = None
52
+ finished_at: str | None = None
53
+ usage: Usage = field(default_factory=Usage)
54
+ steps: list[RunStep] = field(default_factory=list)
55
+ tool_calls: list[RunToolCall] = field(default_factory=list)
56
+ artifacts: list[str] = field(default_factory=list)
57
+
58
+ def to_dict(self) -> dict[str, Any]:
59
+ """Convert the report into plain data for CLI or API serialization."""
60
+ return asdict(self)
61
+
62
+
63
+ class RunReportProjector(RunEventSink):
64
+ """Project canonical run events into a :class:`RunReport`."""
65
+
66
+ def __init__(self) -> None:
67
+ """Initialize empty projection state for a single run."""
68
+ self._task = ""
69
+ self._started_at = ""
70
+ self._run_id = ""
71
+ self._status = "failed"
72
+ self._completed = False
73
+ self._final_output = ""
74
+ self._reason: str | None = None
75
+ self._finished_at: str | None = None
76
+ self._usage = Usage()
77
+ self._steps: list[RunStep] = []
78
+ self._tool_calls: list[RunToolCall] = []
79
+
80
+ def consume(self, event: RunEvent) -> None:
81
+ """Update the projection incrementally from one canonical run event."""
82
+ if event.kind == "run_started":
83
+ self._task = str(event.payload.get("task", ""))
84
+ self._started_at = event.ts
85
+ self._run_id = event.run_id
86
+ return
87
+
88
+ if event.kind == "model_responded":
89
+ tool_call_names: list[str] = []
90
+ requested_tools = event.payload.get("requested_tools", [])
91
+ if isinstance(requested_tools, list):
92
+ for item in requested_tools:
93
+ if isinstance(item, dict) and item.get("name") is not None:
94
+ tool_call_names.append(str(item["name"]))
95
+ self._steps.append(
96
+ RunStep(
97
+ step=event.step or 0,
98
+ assistant_text=str(event.payload.get("output_text", "")),
99
+ tool_calls=list(tool_call_names)
100
+ if isinstance(tool_call_names, list)
101
+ else [],
102
+ ts=event.ts,
103
+ )
104
+ )
105
+ return
106
+
107
+ if event.kind == "tool_executed":
108
+ arguments = event.payload.get("arguments")
109
+ self._tool_calls.append(
110
+ RunToolCall(
111
+ step=event.step or 0,
112
+ call_id=str(event.payload.get("call_id", "")),
113
+ name=str(event.payload.get("name", "")),
114
+ arguments=dict(arguments) if isinstance(arguments, dict) else {},
115
+ is_error=bool(event.payload.get("is_error", False)),
116
+ output=event.payload.get("output"),
117
+ error=event.payload.get("error"),
118
+ model_payload=event.payload.get("model_payload"),
119
+ duration_ms=event.payload.get("duration_ms"),
120
+ ts=event.ts,
121
+ )
122
+ )
123
+ return
124
+
125
+ if event.kind == "run_finished":
126
+ self._finished_at = event.ts
127
+ self._status = str(event.payload.get("status", self._status))
128
+ self._completed = self._status == "completed"
129
+ if "final_output" in event.payload:
130
+ self._final_output = str(event.payload.get("final_output", ""))
131
+ reason = event.payload.get("reason")
132
+ if reason is not None:
133
+ self._reason = str(reason)
134
+ usage = event.payload.get("usage")
135
+ if isinstance(usage, dict):
136
+ self._usage = usage_from_payload(usage)
137
+
138
+ def build(self, *, runlog_path: str | None = None) -> RunReport:
139
+ """Materialize the current projection into an immutable report object."""
140
+ if not self._run_id or not self._started_at:
141
+ raise RuntimeError("RunReportProjector has not received run_started.")
142
+
143
+ return RunReport(
144
+ task=self._task,
145
+ started_at=self._started_at,
146
+ run_id=self._run_id,
147
+ runlog_path=runlog_path,
148
+ status=self._status,
149
+ completed=self._completed,
150
+ final_output=self._final_output,
151
+ reason=self._reason,
152
+ finished_at=self._finished_at,
153
+ usage=self._usage,
154
+ steps=list(self._steps),
155
+ tool_calls=list(self._tool_calls),
156
+ )
157
+
158
+ @property
159
+ def step_count(self) -> int:
160
+ """Return the number of model turns observed so far."""
161
+ return len(self._steps)
162
+
163
+ @property
164
+ def tool_call_count(self) -> int:
165
+ """Return the number of tool executions observed so far."""
166
+ return len(self._tool_calls)