base-agentkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentkit/__init__.py +35 -0
- agentkit/agent/__init__.py +7 -0
- agentkit/agent/agent.py +368 -0
- agentkit/agent/budgets.py +48 -0
- agentkit/agent/report.py +166 -0
- agentkit/agent/tool_runtime.py +77 -0
- agentkit/cli/__init__.py +5 -0
- agentkit/cli/main.py +108 -0
- agentkit/config/__init__.py +23 -0
- agentkit/config/loader.py +108 -0
- agentkit/config/provider_defaults.py +96 -0
- agentkit/config/schema.py +148 -0
- agentkit/constants.py +21 -0
- agentkit/errors.py +58 -0
- agentkit/llm/__init__.py +53 -0
- agentkit/llm/base.py +36 -0
- agentkit/llm/factory.py +27 -0
- agentkit/llm/providers/__init__.py +15 -0
- agentkit/llm/providers/anthropic_provider.py +371 -0
- agentkit/llm/providers/gemini_provider.py +396 -0
- agentkit/llm/providers/openai_provider.py +881 -0
- agentkit/llm/providers/qwen_provider.py +34 -0
- agentkit/llm/providers/vllm_provider.py +47 -0
- agentkit/llm/types.py +215 -0
- agentkit/llm/usage.py +72 -0
- agentkit/py.typed +0 -0
- agentkit/runlog/__init__.py +15 -0
- agentkit/runlog/events.py +67 -0
- agentkit/runlog/jsonl.py +90 -0
- agentkit/runlog/recorder.py +94 -0
- agentkit/runlog/sinks.py +15 -0
- agentkit/tools/__init__.py +16 -0
- agentkit/tools/base.py +139 -0
- agentkit/tools/library/__init__.py +8 -0
- agentkit/tools/library/_fs_common.py +330 -0
- agentkit/tools/library/create_file.py +168 -0
- agentkit/tools/library/fs_tools.py +21 -0
- agentkit/tools/library/str_replace.py +241 -0
- agentkit/tools/library/view.py +372 -0
- agentkit/tools/library/word_count.py +138 -0
- agentkit/tools/loader.py +81 -0
- agentkit/tools/registry.py +284 -0
- agentkit/tools/types.py +98 -0
- agentkit/workspace/__init__.py +6 -0
- agentkit/workspace/fs.py +288 -0
- agentkit/workspace/layout.py +33 -0
- base_agentkit-0.1.0.dist-info/METADATA +142 -0
- base_agentkit-0.1.0.dist-info/RECORD +51 -0
- base_agentkit-0.1.0.dist-info/WHEEL +4 -0
- base_agentkit-0.1.0.dist-info/entry_points.txt +3 -0
- base_agentkit-0.1.0.dist-info/licenses/LICENSE +183 -0
agentkit/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Generic LLM agent framework."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .agent.agent import Agent
|
|
8
|
+
from .config.loader import load_config
|
|
9
|
+
from .config.schema import AgentkitConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_agent(
|
|
13
|
+
config_or_path: AgentkitConfig | str | Path,
|
|
14
|
+
) -> Agent:
|
|
15
|
+
"""Build an :class:`Agent` from config data or a config file.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
config_or_path: Either a fully instantiated framework config object or a
|
|
19
|
+
filesystem path to a YAML/JSON config file.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Agent: A configured agent instance ready to execute tasks.
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
agentkit.errors.ConfigError: If the config file is invalid or missing required
|
|
26
|
+
fields.
|
|
27
|
+
"""
|
|
28
|
+
if isinstance(config_or_path, AgentkitConfig):
|
|
29
|
+
config = config_or_path
|
|
30
|
+
else:
|
|
31
|
+
config = load_config(config_or_path)
|
|
32
|
+
return Agent.from_config(config)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
__all__ = ["Agent", "AgentkitConfig", "create_agent", "load_config"]
|
agentkit/agent/agent.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""Agent core loop: model inference <-> tool execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import asdict
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from agentkit.agent.budgets import RuntimeBudget
|
|
10
|
+
from agentkit.agent.report import RunReport, RunReportProjector
|
|
11
|
+
from agentkit.agent.tool_runtime import AgentToolRuntime
|
|
12
|
+
from agentkit.config.schema import AgentkitConfig
|
|
13
|
+
from agentkit.errors import ProviderError
|
|
14
|
+
from agentkit.llm.base import BaseLLMProvider
|
|
15
|
+
from agentkit.llm.factory import build_provider
|
|
16
|
+
from agentkit.llm.types import (
|
|
17
|
+
ConversationItem,
|
|
18
|
+
ConversationState,
|
|
19
|
+
GenerationOptions,
|
|
20
|
+
MessageItem,
|
|
21
|
+
ToolCallItem,
|
|
22
|
+
ToolResultItem,
|
|
23
|
+
UnifiedLLMRequest,
|
|
24
|
+
Usage,
|
|
25
|
+
)
|
|
26
|
+
from agentkit.llm.usage import merge_usage, usage_to_payload
|
|
27
|
+
from agentkit.runlog import JsonlRunLogSink
|
|
28
|
+
from agentkit.tools.loader import load_tools_from_library
|
|
29
|
+
from agentkit.tools.registry import ToolRegistry
|
|
30
|
+
from agentkit.runlog.recorder import RunRecorder
|
|
31
|
+
from agentkit.workspace.fs import WorkspaceFS
|
|
32
|
+
from agentkit.workspace.layout import init_workspace_layout
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Agent:
|
|
36
|
+
"""Coordinate model calls, tool execution, and event-driven projections.
|
|
37
|
+
|
|
38
|
+
The Agent orchestrates the core loop:
|
|
39
|
+
|
|
40
|
+
User Task
|
|
41
|
+
↓
|
|
42
|
+
Model Inference
|
|
43
|
+
↓
|
|
44
|
+
Tool Calls (optional)
|
|
45
|
+
↓
|
|
46
|
+
Tool Execution
|
|
47
|
+
↓
|
|
48
|
+
Model Continues
|
|
49
|
+
↓
|
|
50
|
+
Final Result
|
|
51
|
+
|
|
52
|
+
All events are recorded and projected into a RunReport and optional run logs.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
*,
|
|
58
|
+
config: AgentkitConfig,
|
|
59
|
+
fs: WorkspaceFS,
|
|
60
|
+
provider: BaseLLMProvider,
|
|
61
|
+
tool_runtime: AgentToolRuntime,
|
|
62
|
+
runlog_sink: JsonlRunLogSink,
|
|
63
|
+
) -> None:
|
|
64
|
+
self.config = config
|
|
65
|
+
self.fs = fs
|
|
66
|
+
self.provider = provider
|
|
67
|
+
self.tool_runtime = tool_runtime
|
|
68
|
+
self.runlog_sink = runlog_sink
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_config(cls, config: AgentkitConfig) -> "Agent":
|
|
72
|
+
"""Build an Agent from validated configuration."""
|
|
73
|
+
|
|
74
|
+
# Initialize workspace directory layout
|
|
75
|
+
workspace_root = init_workspace_layout(config.workspace.root)
|
|
76
|
+
fs = WorkspaceFS(workspace_root)
|
|
77
|
+
|
|
78
|
+
# Build the configured LLM provider
|
|
79
|
+
provider = build_provider(config.provider)
|
|
80
|
+
|
|
81
|
+
# Load tools from the workspace library and register them
|
|
82
|
+
registry = ToolRegistry()
|
|
83
|
+
registry.register_many(load_tools_from_library(fs))
|
|
84
|
+
|
|
85
|
+
# Create tool runtime with allowlist filtering
|
|
86
|
+
tool_runtime = AgentToolRuntime(registry, config.tools.allowed)
|
|
87
|
+
|
|
88
|
+
# Initialize run logging
|
|
89
|
+
runlog_sink = JsonlRunLogSink(fs, config.runlog)
|
|
90
|
+
|
|
91
|
+
return cls(
|
|
92
|
+
config=config,
|
|
93
|
+
fs=fs,
|
|
94
|
+
provider=provider,
|
|
95
|
+
tool_runtime=tool_runtime,
|
|
96
|
+
runlog_sink=runlog_sink,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def run(self, task: str) -> RunReport:
|
|
100
|
+
"""Execute the full model-tool loop for a single task.
|
|
101
|
+
|
|
102
|
+
The run proceeds as:
|
|
103
|
+
|
|
104
|
+
1. Initialize runtime state
|
|
105
|
+
2. Call model
|
|
106
|
+
3. Execute tools if requested
|
|
107
|
+
4. Feed tool results back to model
|
|
108
|
+
5. Repeat until model completes
|
|
109
|
+
6. Produce final RunReport
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
# Project events into both a final report and the optional run log.
|
|
113
|
+
report_projector = RunReportProjector()
|
|
114
|
+
recorder = RunRecorder(sinks=[report_projector, self.runlog_sink])
|
|
115
|
+
|
|
116
|
+
tool_specs = self.tool_runtime.schemas() # tool schemas exposed to the LLM
|
|
117
|
+
instructions = self.config.agent.system_prompt # system prompt
|
|
118
|
+
options = GenerationOptions(
|
|
119
|
+
temperature=self.config.provider.temperature,
|
|
120
|
+
reasoning_effort=self.config.provider.reasoning_effort,
|
|
121
|
+
thinking_enabled=self.config.provider.enable_thinking,
|
|
122
|
+
) # generation parameters
|
|
123
|
+
|
|
124
|
+
# Start run recording
|
|
125
|
+
run_id = recorder.start_run(
|
|
126
|
+
task=task,
|
|
127
|
+
context={
|
|
128
|
+
"provider": self.config.provider.kind,
|
|
129
|
+
"model": self.config.provider.model,
|
|
130
|
+
"conversation_mode": self.config.provider.conversation_mode,
|
|
131
|
+
"instructions": instructions,
|
|
132
|
+
"tools": [asdict(tool) for tool in tool_specs],
|
|
133
|
+
"options": asdict(options),
|
|
134
|
+
},
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Runtime guardrails
|
|
138
|
+
budget = RuntimeBudget(
|
|
139
|
+
max_steps=self.config.agent.budget.max_steps,
|
|
140
|
+
time_budget_s=self.config.agent.budget.time_budget_s,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Conversation state holds full chat history
|
|
144
|
+
state = ConversationState(mode=self.config.provider.conversation_mode)
|
|
145
|
+
|
|
146
|
+
# Inputs for the next model turn
|
|
147
|
+
next_inputs: list[ConversationItem] = [MessageItem(role="user", text=task)]
|
|
148
|
+
|
|
149
|
+
step = 0
|
|
150
|
+
run_closed = False
|
|
151
|
+
model_step_count = 0
|
|
152
|
+
tool_call_count = 0
|
|
153
|
+
aggregate_usage = Usage()
|
|
154
|
+
|
|
155
|
+
# Track last response for better error reporting
|
|
156
|
+
last_response_reason: str | None = None
|
|
157
|
+
last_response_output = ""
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
while True:
|
|
161
|
+
# Ensure execution stays within configured limits
|
|
162
|
+
budget.ensure_can_continue(step)
|
|
163
|
+
|
|
164
|
+
# Build the unified LLM request
|
|
165
|
+
req = UnifiedLLMRequest(
|
|
166
|
+
provider=self.config.provider.kind,
|
|
167
|
+
model=self.config.provider.model,
|
|
168
|
+
state=state,
|
|
169
|
+
inputs=list(next_inputs),
|
|
170
|
+
instructions=instructions,
|
|
171
|
+
tools=tool_specs,
|
|
172
|
+
options=options,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Call LLM provider
|
|
176
|
+
call_start = time.perf_counter()
|
|
177
|
+
response = self.provider.generate(req)
|
|
178
|
+
call_ms = (time.perf_counter() - call_start) * 1000
|
|
179
|
+
|
|
180
|
+
last_response_reason = response.reason
|
|
181
|
+
last_response_output = response.output_text
|
|
182
|
+
merge_usage(aggregate_usage, response.usage)
|
|
183
|
+
|
|
184
|
+
# Update conversation history
|
|
185
|
+
state.history.extend(req.inputs)
|
|
186
|
+
state.history.extend(response.output_items)
|
|
187
|
+
|
|
188
|
+
# Apply provider state patches (cursor updates etc.)
|
|
189
|
+
if response.state_patch.new_provider_cursor is not None:
|
|
190
|
+
state.provider_cursor = response.state_patch.new_provider_cursor
|
|
191
|
+
|
|
192
|
+
if response.state_patch.provider_meta_patch:
|
|
193
|
+
state.provider_meta.update(response.state_patch.provider_meta_patch)
|
|
194
|
+
|
|
195
|
+
# Record model response event
|
|
196
|
+
recorder.emit(
|
|
197
|
+
"model_responded",
|
|
198
|
+
step=step,
|
|
199
|
+
payload={
|
|
200
|
+
"status": response.status,
|
|
201
|
+
"reason": response.reason,
|
|
202
|
+
"output_text": response.output_text,
|
|
203
|
+
"duration_ms": call_ms,
|
|
204
|
+
"requested_tools": [
|
|
205
|
+
self._serialize_item(call) for call in response.tool_calls
|
|
206
|
+
],
|
|
207
|
+
"request": {
|
|
208
|
+
"state_cursor": req.state.provider_cursor,
|
|
209
|
+
"inputs": [self._serialize_item(i) for i in req.inputs],
|
|
210
|
+
},
|
|
211
|
+
"response": {
|
|
212
|
+
"response_id": response.response_id,
|
|
213
|
+
"output_items": [
|
|
214
|
+
self._serialize_item(i) for i in response.output_items
|
|
215
|
+
],
|
|
216
|
+
"usage": asdict(response.usage),
|
|
217
|
+
"state_patch": asdict(response.state_patch),
|
|
218
|
+
"raw_response": response.raw_response,
|
|
219
|
+
},
|
|
220
|
+
},
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
model_step_count += 1
|
|
224
|
+
next_inputs = []
|
|
225
|
+
|
|
226
|
+
# Branch based on provider response status
|
|
227
|
+
match response.status:
|
|
228
|
+
case "requires_tool":
|
|
229
|
+
# Model requested tool execution
|
|
230
|
+
|
|
231
|
+
if not response.tool_calls:
|
|
232
|
+
raise ProviderError(
|
|
233
|
+
"Model turn requested tool execution but returned no tool calls."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Execute tools and collect results
|
|
237
|
+
for call in response.tool_calls:
|
|
238
|
+
outcome = self.tool_runtime.execute(call)
|
|
239
|
+
|
|
240
|
+
# Tool result becomes next model input
|
|
241
|
+
next_inputs.append(
|
|
242
|
+
self.tool_runtime.build_result_item(outcome)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Record tool execution
|
|
246
|
+
recorder.emit(
|
|
247
|
+
"tool_executed",
|
|
248
|
+
step=step,
|
|
249
|
+
payload=outcome.to_event_payload(),
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
tool_call_count += 1
|
|
253
|
+
|
|
254
|
+
step += 1
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
case "completed" | "blocked" | "incomplete":
|
|
258
|
+
# Run finished (successfully or otherwise)
|
|
259
|
+
|
|
260
|
+
if response.reason == "pause":
|
|
261
|
+
raise ProviderError(
|
|
262
|
+
"Model turn paused with reason=pause. Automatic continuation is not implemented."
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
recorder.end_run(
|
|
266
|
+
status=response.status,
|
|
267
|
+
payload={
|
|
268
|
+
"reason": response.reason,
|
|
269
|
+
"step_count": model_step_count,
|
|
270
|
+
"tool_call_count": tool_call_count,
|
|
271
|
+
"final_output": response.output_text,
|
|
272
|
+
"usage": usage_to_payload(aggregate_usage),
|
|
273
|
+
},
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
run_closed = True
|
|
277
|
+
break
|
|
278
|
+
|
|
279
|
+
case "failed":
|
|
280
|
+
# Provider explicitly reported failure
|
|
281
|
+
raise ProviderError(
|
|
282
|
+
f"Model turn failed with reason={response.reason}."
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
case _:
|
|
286
|
+
# Unknown response status
|
|
287
|
+
raise ProviderError(
|
|
288
|
+
f"Model turn returned unsupported status={response.status!r}."
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
except Exception as exc:
|
|
292
|
+
# Ensure failed runs are properly recorded
|
|
293
|
+
if not run_closed:
|
|
294
|
+
payload = {
|
|
295
|
+
"step_count": model_step_count,
|
|
296
|
+
"tool_call_count": tool_call_count,
|
|
297
|
+
"error_type": type(exc).__name__,
|
|
298
|
+
"error_message": str(exc),
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if last_response_reason is not None:
|
|
302
|
+
payload["reason"] = last_response_reason
|
|
303
|
+
|
|
304
|
+
if last_response_output:
|
|
305
|
+
payload["final_output"] = last_response_output
|
|
306
|
+
|
|
307
|
+
payload["usage"] = usage_to_payload(aggregate_usage)
|
|
308
|
+
|
|
309
|
+
recorder.end_run(
|
|
310
|
+
status="failed",
|
|
311
|
+
payload=payload,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
raise
|
|
315
|
+
|
|
316
|
+
# Build final report from recorded events
|
|
317
|
+
report = report_projector.build(
|
|
318
|
+
runlog_path=(
|
|
319
|
+
str(self.runlog_sink.runlog_path_for_run(run_id))
|
|
320
|
+
if self.runlog_sink.enabled
|
|
321
|
+
else None
|
|
322
|
+
)
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Safety check to ensure run IDs match
|
|
326
|
+
if report.run_id != run_id:
|
|
327
|
+
raise RuntimeError(
|
|
328
|
+
"Run report projection mismatch: run_id changed during recording."
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
return report
|
|
332
|
+
|
|
333
|
+
def _serialize_item(self, item: ConversationItem) -> dict[str, Any]:
|
|
334
|
+
"""Convert a conversation item into a run-log friendly structure.
|
|
335
|
+
|
|
336
|
+
This avoids leaking internal objects into logs and ensures JSON-safe output.
|
|
337
|
+
"""
|
|
338
|
+
|
|
339
|
+
if isinstance(item, MessageItem):
|
|
340
|
+
return {"kind": "message", "role": item.role, "text": item.text}
|
|
341
|
+
|
|
342
|
+
if isinstance(item, ToolCallItem):
|
|
343
|
+
return {
|
|
344
|
+
"kind": "tool_call",
|
|
345
|
+
"call_id": item.call_id,
|
|
346
|
+
"name": item.name,
|
|
347
|
+
"arguments": item.arguments,
|
|
348
|
+
"raw_arguments": item.raw_arguments,
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if isinstance(item, ToolResultItem):
|
|
352
|
+
return {
|
|
353
|
+
"kind": "tool_result",
|
|
354
|
+
"call_id": item.call_id,
|
|
355
|
+
"tool_name": item.tool_name,
|
|
356
|
+
"payload": item.payload,
|
|
357
|
+
"output_text": item.output_text,
|
|
358
|
+
"is_error": item.is_error,
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# Default: reasoning / thinking items
|
|
362
|
+
return {
|
|
363
|
+
"kind": "reasoning",
|
|
364
|
+
"text": item.text,
|
|
365
|
+
"summary": item.summary,
|
|
366
|
+
"raw_item": item.raw_item,
|
|
367
|
+
"replay_hint": item.replay_hint,
|
|
368
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Runtime budget guards."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from agentkit.errors import BudgetExceededError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RuntimeBudget:
|
|
11
|
+
"""Track and enforce runtime limits for a single agent run."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, *, max_steps: int, time_budget_s: int) -> None:
|
|
14
|
+
"""Initialize budget thresholds and start timing.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
max_steps: Maximum number of model/tool loop iterations allowed.
|
|
18
|
+
time_budget_s: Maximum wall-clock runtime in seconds.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
None
|
|
22
|
+
"""
|
|
23
|
+
self.max_steps = max_steps
|
|
24
|
+
self.time_budget_s = time_budget_s
|
|
25
|
+
self.started_monotonic = time.monotonic()
|
|
26
|
+
|
|
27
|
+
def ensure_can_continue(self, step_index: int) -> None:
|
|
28
|
+
"""Raise if the run exceeded configured step or time budgets.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
step_index: Zero-based index of the next step to execute.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
None
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
agentkit.errors.BudgetExceededError: If the step count or elapsed time is
|
|
38
|
+
above configured limits.
|
|
39
|
+
"""
|
|
40
|
+
if step_index >= self.max_steps:
|
|
41
|
+
raise BudgetExceededError(
|
|
42
|
+
f"Step budget exceeded: step={step_index}, max_steps={self.max_steps}"
|
|
43
|
+
)
|
|
44
|
+
elapsed = time.monotonic() - self.started_monotonic
|
|
45
|
+
if elapsed > self.time_budget_s:
|
|
46
|
+
raise BudgetExceededError(
|
|
47
|
+
f"Time budget exceeded: elapsed={elapsed:.1f}s, budget={self.time_budget_s}s"
|
|
48
|
+
)
|
agentkit/agent/report.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Run report projection from canonical run events."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from agentkit.llm.types import Usage
|
|
9
|
+
from agentkit.llm.usage import usage_from_payload
|
|
10
|
+
from agentkit.runlog.events import RunEvent
|
|
11
|
+
from agentkit.runlog.sinks import RunEventSink
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class RunStep:
|
|
16
|
+
"""One model turn summary in the returned run report."""
|
|
17
|
+
|
|
18
|
+
step: int
|
|
19
|
+
assistant_text: str
|
|
20
|
+
tool_calls: list[str] = field(default_factory=list)
|
|
21
|
+
ts: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(slots=True)
|
|
25
|
+
class RunToolCall:
|
|
26
|
+
"""One tool execution record in the returned run report."""
|
|
27
|
+
|
|
28
|
+
step: int
|
|
29
|
+
call_id: str
|
|
30
|
+
name: str
|
|
31
|
+
arguments: dict[str, Any]
|
|
32
|
+
is_error: bool
|
|
33
|
+
output: Any = None
|
|
34
|
+
error: str | None = None
|
|
35
|
+
model_payload: Any = None
|
|
36
|
+
duration_ms: float | None = None
|
|
37
|
+
ts: str = ""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(slots=True)
|
|
41
|
+
class RunReport:
|
|
42
|
+
"""Structured result object returned by :meth:`agentkit.agent.Agent.run`."""
|
|
43
|
+
|
|
44
|
+
task: str
|
|
45
|
+
started_at: str
|
|
46
|
+
run_id: str
|
|
47
|
+
runlog_path: str | None = None
|
|
48
|
+
status: str = "failed"
|
|
49
|
+
completed: bool = False
|
|
50
|
+
final_output: str = ""
|
|
51
|
+
reason: str | None = None
|
|
52
|
+
finished_at: str | None = None
|
|
53
|
+
usage: Usage = field(default_factory=Usage)
|
|
54
|
+
steps: list[RunStep] = field(default_factory=list)
|
|
55
|
+
tool_calls: list[RunToolCall] = field(default_factory=list)
|
|
56
|
+
artifacts: list[str] = field(default_factory=list)
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict[str, Any]:
|
|
59
|
+
"""Convert the report into plain data for CLI or API serialization."""
|
|
60
|
+
return asdict(self)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RunReportProjector(RunEventSink):
|
|
64
|
+
"""Project canonical run events into a :class:`RunReport`."""
|
|
65
|
+
|
|
66
|
+
def __init__(self) -> None:
|
|
67
|
+
"""Initialize empty projection state for a single run."""
|
|
68
|
+
self._task = ""
|
|
69
|
+
self._started_at = ""
|
|
70
|
+
self._run_id = ""
|
|
71
|
+
self._status = "failed"
|
|
72
|
+
self._completed = False
|
|
73
|
+
self._final_output = ""
|
|
74
|
+
self._reason: str | None = None
|
|
75
|
+
self._finished_at: str | None = None
|
|
76
|
+
self._usage = Usage()
|
|
77
|
+
self._steps: list[RunStep] = []
|
|
78
|
+
self._tool_calls: list[RunToolCall] = []
|
|
79
|
+
|
|
80
|
+
def consume(self, event: RunEvent) -> None:
|
|
81
|
+
"""Update the projection incrementally from one canonical run event."""
|
|
82
|
+
if event.kind == "run_started":
|
|
83
|
+
self._task = str(event.payload.get("task", ""))
|
|
84
|
+
self._started_at = event.ts
|
|
85
|
+
self._run_id = event.run_id
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
if event.kind == "model_responded":
|
|
89
|
+
tool_call_names: list[str] = []
|
|
90
|
+
requested_tools = event.payload.get("requested_tools", [])
|
|
91
|
+
if isinstance(requested_tools, list):
|
|
92
|
+
for item in requested_tools:
|
|
93
|
+
if isinstance(item, dict) and item.get("name") is not None:
|
|
94
|
+
tool_call_names.append(str(item["name"]))
|
|
95
|
+
self._steps.append(
|
|
96
|
+
RunStep(
|
|
97
|
+
step=event.step or 0,
|
|
98
|
+
assistant_text=str(event.payload.get("output_text", "")),
|
|
99
|
+
tool_calls=list(tool_call_names)
|
|
100
|
+
if isinstance(tool_call_names, list)
|
|
101
|
+
else [],
|
|
102
|
+
ts=event.ts,
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
if event.kind == "tool_executed":
|
|
108
|
+
arguments = event.payload.get("arguments")
|
|
109
|
+
self._tool_calls.append(
|
|
110
|
+
RunToolCall(
|
|
111
|
+
step=event.step or 0,
|
|
112
|
+
call_id=str(event.payload.get("call_id", "")),
|
|
113
|
+
name=str(event.payload.get("name", "")),
|
|
114
|
+
arguments=dict(arguments) if isinstance(arguments, dict) else {},
|
|
115
|
+
is_error=bool(event.payload.get("is_error", False)),
|
|
116
|
+
output=event.payload.get("output"),
|
|
117
|
+
error=event.payload.get("error"),
|
|
118
|
+
model_payload=event.payload.get("model_payload"),
|
|
119
|
+
duration_ms=event.payload.get("duration_ms"),
|
|
120
|
+
ts=event.ts,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
if event.kind == "run_finished":
|
|
126
|
+
self._finished_at = event.ts
|
|
127
|
+
self._status = str(event.payload.get("status", self._status))
|
|
128
|
+
self._completed = self._status == "completed"
|
|
129
|
+
if "final_output" in event.payload:
|
|
130
|
+
self._final_output = str(event.payload.get("final_output", ""))
|
|
131
|
+
reason = event.payload.get("reason")
|
|
132
|
+
if reason is not None:
|
|
133
|
+
self._reason = str(reason)
|
|
134
|
+
usage = event.payload.get("usage")
|
|
135
|
+
if isinstance(usage, dict):
|
|
136
|
+
self._usage = usage_from_payload(usage)
|
|
137
|
+
|
|
138
|
+
def build(self, *, runlog_path: str | None = None) -> RunReport:
|
|
139
|
+
"""Materialize the current projection into an immutable report object."""
|
|
140
|
+
if not self._run_id or not self._started_at:
|
|
141
|
+
raise RuntimeError("RunReportProjector has not received run_started.")
|
|
142
|
+
|
|
143
|
+
return RunReport(
|
|
144
|
+
task=self._task,
|
|
145
|
+
started_at=self._started_at,
|
|
146
|
+
run_id=self._run_id,
|
|
147
|
+
runlog_path=runlog_path,
|
|
148
|
+
status=self._status,
|
|
149
|
+
completed=self._completed,
|
|
150
|
+
final_output=self._final_output,
|
|
151
|
+
reason=self._reason,
|
|
152
|
+
finished_at=self._finished_at,
|
|
153
|
+
usage=self._usage,
|
|
154
|
+
steps=list(self._steps),
|
|
155
|
+
tool_calls=list(self._tool_calls),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def step_count(self) -> int:
|
|
160
|
+
"""Return the number of model turns observed so far."""
|
|
161
|
+
return len(self._steps)
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def tool_call_count(self) -> int:
|
|
165
|
+
"""Return the number of tool executions observed so far."""
|
|
166
|
+
return len(self._tool_calls)
|