akernel-runtime 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akernel_runtime-0.1.0.dist-info/METADATA +270 -0
- akernel_runtime-0.1.0.dist-info/RECORD +40 -0
- akernel_runtime-0.1.0.dist-info/WHEEL +5 -0
- akernel_runtime-0.1.0.dist-info/entry_points.txt +2 -0
- akernel_runtime-0.1.0.dist-info/licenses/LICENSE +201 -0
- akernel_runtime-0.1.0.dist-info/licenses/NOTICE +4 -0
- akernel_runtime-0.1.0.dist-info/top_level.txt +1 -0
- context_kernel/__init__.py +4 -0
- context_kernel/__main__.py +5 -0
- context_kernel/agent_reports.py +188 -0
- context_kernel/benchmarks.py +493 -0
- context_kernel/budget.py +72 -0
- context_kernel/cli.py +2953 -0
- context_kernel/context.py +161 -0
- context_kernel/evals.py +347 -0
- context_kernel/global_memory.py +126 -0
- context_kernel/loop.py +1617 -0
- context_kernel/marketplace.py +194 -0
- context_kernel/marketplace_data/skills/context_budget.json +27 -0
- context_kernel/marketplace_data/skills/context_compaction.json +27 -0
- context_kernel/marketplace_data/skills/edit_file.json +27 -0
- context_kernel/marketplace_data/skills/index.json +66 -0
- context_kernel/marketplace_data/skills/long_task_planning.json +27 -0
- context_kernel/marketplace_data/skills/multi_file_bugfix.json +28 -0
- context_kernel/memory.py +515 -0
- context_kernel/models.py +144 -0
- context_kernel/planner.py +155 -0
- context_kernel/policy.py +271 -0
- context_kernel/project.py +317 -0
- context_kernel/providers.py +1264 -0
- context_kernel/report_costs.py +375 -0
- context_kernel/runner.py +78 -0
- context_kernel/skills.py +318 -0
- context_kernel/state_writer.py +108 -0
- context_kernel/storage.py +171 -0
- context_kernel/tasks.py +549 -0
- context_kernel/text.py +42 -0
- context_kernel/tokenizer.py +22 -0
- context_kernel/tools.py +544 -0
- context_kernel/verifier.py +77 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
DEFAULT_COST_TOKEN_TOLERANCE = 10
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def build_eval_cost_report(report: dict[str, Any]) -> dict[str, Any]:
|
|
10
|
+
return build_cost_report(
|
|
11
|
+
kind="eval",
|
|
12
|
+
report=report,
|
|
13
|
+
items=report.get("tasks", []),
|
|
14
|
+
source_label=str(report.get("fixture", "")),
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build_benchmark_cost_report(report: dict[str, Any]) -> dict[str, Any]:
|
|
19
|
+
items: list[dict[str, Any]] = []
|
|
20
|
+
for fixture in report.get("fixtures", []):
|
|
21
|
+
fixture_name = Path(str(fixture.get("fixture", ""))).name
|
|
22
|
+
for task in fixture.get("tasks", []):
|
|
23
|
+
item = dict(task)
|
|
24
|
+
item["fixture"] = fixture_name
|
|
25
|
+
items.append(item)
|
|
26
|
+
return build_cost_report(
|
|
27
|
+
kind="benchmark",
|
|
28
|
+
report=report,
|
|
29
|
+
items=items,
|
|
30
|
+
source_label=str(report.get("benchmark", "")),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_cost_report(*, kind: str, report: dict[str, Any], items: list[dict[str, Any]], source_label: str) -> dict[str, Any]:
|
|
35
|
+
summaries = [cost_item_snapshot(item) for item in items]
|
|
36
|
+
hotspots = sorted(summaries, key=lambda item: item["kernel_tokens"], reverse=True)[:5]
|
|
37
|
+
low_savings = sorted(summaries, key=lambda item: item["savings_percent"])[:5]
|
|
38
|
+
summary = report.get("summary", {})
|
|
39
|
+
return {
|
|
40
|
+
"kind": kind,
|
|
41
|
+
"id": report.get("id"),
|
|
42
|
+
"name": report.get("name"),
|
|
43
|
+
"source": source_label,
|
|
44
|
+
"summary": {
|
|
45
|
+
"item_count": len(summaries),
|
|
46
|
+
"kernel_tokens": int(summary.get("total_kernel_tokens", 0) or 0),
|
|
47
|
+
"baseline_tokens": int(summary.get("total_baseline_tokens", 0) or 0),
|
|
48
|
+
"savings_tokens": int(summary.get("total_savings_tokens", 0) or 0),
|
|
49
|
+
"savings_percent": float(summary.get("total_savings_percent", 0) or 0),
|
|
50
|
+
"average_savings_percent": float(summary.get("average_savings_percent", 0) or 0),
|
|
51
|
+
"execution_tokens": int(summary.get("total_execution_tokens", 0) or 0),
|
|
52
|
+
"passed_checks": int(summary.get("passed_checks", 0) or 0),
|
|
53
|
+
"total_checks": int(summary.get("total_checks", 0) or 0),
|
|
54
|
+
"executed_items": int(summary.get("executed_tasks", 0) or 0),
|
|
55
|
+
"blocked_items": int(summary.get("blocked_tasks", 0) or 0),
|
|
56
|
+
},
|
|
57
|
+
"hotspots": hotspots,
|
|
58
|
+
"low_savings": low_savings,
|
|
59
|
+
"items": summaries,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def cost_item_snapshot(item: dict[str, Any]) -> dict[str, Any]:
|
|
64
|
+
kernel = item.get("kernel", {})
|
|
65
|
+
baseline = item.get("baseline", {})
|
|
66
|
+
savings = item.get("savings", {})
|
|
67
|
+
execution = item.get("execution", {}) if isinstance(item.get("execution"), dict) else {}
|
|
68
|
+
checks = item.get("checks", {})
|
|
69
|
+
kernel_tokens = int(kernel.get("estimated_tokens", 0) or 0)
|
|
70
|
+
baseline_tokens = int(baseline.get("estimated_tokens", 0) or 0)
|
|
71
|
+
execution_tokens = int(execution.get("total_tokens", 0) or 0)
|
|
72
|
+
savings_tokens = max(0, baseline_tokens - kernel_tokens)
|
|
73
|
+
savings_percent = float(savings.get("percent", 0) or 0)
|
|
74
|
+
snapshot = {
|
|
75
|
+
"id": item.get("id"),
|
|
76
|
+
"profile": item.get("profile"),
|
|
77
|
+
"kernel_tokens": kernel_tokens,
|
|
78
|
+
"baseline_tokens": baseline_tokens,
|
|
79
|
+
"savings_tokens": savings_tokens,
|
|
80
|
+
"savings_percent": savings_percent,
|
|
81
|
+
"execution_tokens": execution_tokens,
|
|
82
|
+
"checks": f"{int(checks.get('passed', 0) or 0)}/{int(checks.get('total', 0) or 0)}",
|
|
83
|
+
"executed": bool(execution) and not bool(execution.get("blocked")),
|
|
84
|
+
"blocked": bool(execution.get("blocked")),
|
|
85
|
+
}
|
|
86
|
+
if item.get("fixture"):
|
|
87
|
+
snapshot["fixture"] = item.get("fixture")
|
|
88
|
+
return snapshot
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def render_cost_report(cost: dict[str, Any]) -> str:
|
|
92
|
+
summary = cost["summary"]
|
|
93
|
+
lines = [
|
|
94
|
+
f"{cost['kind']}_cost: {cost['id']}",
|
|
95
|
+
f"name: {cost.get('name', '')}",
|
|
96
|
+
f"source: {cost.get('source', '')}",
|
|
97
|
+
f"items: {summary['item_count']}",
|
|
98
|
+
(
|
|
99
|
+
f"tokens: kernel={summary['kernel_tokens']} baseline={summary['baseline_tokens']} "
|
|
100
|
+
f"savings={summary['savings_tokens']} ({summary['savings_percent']}%) "
|
|
101
|
+
f"execution={summary['execution_tokens']}"
|
|
102
|
+
),
|
|
103
|
+
(
|
|
104
|
+
f"checks: passed={summary['passed_checks']}/{summary['total_checks']} "
|
|
105
|
+
f"executed={summary['executed_items']} blocked={summary['blocked_items']}"
|
|
106
|
+
),
|
|
107
|
+
f"average_savings_percent: {summary['average_savings_percent']}",
|
|
108
|
+
]
|
|
109
|
+
if cost["hotspots"]:
|
|
110
|
+
hotspot = cost["hotspots"][0]
|
|
111
|
+
scope = cost_scope(hotspot)
|
|
112
|
+
lines.append(
|
|
113
|
+
f"hotspot: {scope} kernel={hotspot['kernel_tokens']} baseline={hotspot['baseline_tokens']} "
|
|
114
|
+
f"savings={hotspot['savings_percent']}% checks={hotspot['checks']}"
|
|
115
|
+
)
|
|
116
|
+
if cost["low_savings"]:
|
|
117
|
+
weakest = cost["low_savings"][0]
|
|
118
|
+
scope = cost_scope(weakest)
|
|
119
|
+
lines.append(
|
|
120
|
+
f"weakest_savings: {scope} kernel={weakest['kernel_tokens']} baseline={weakest['baseline_tokens']} "
|
|
121
|
+
f"savings={weakest['savings_percent']}% checks={weakest['checks']}"
|
|
122
|
+
)
|
|
123
|
+
lines.append("")
|
|
124
|
+
lines.append("Hotspots")
|
|
125
|
+
for item in cost["hotspots"]:
|
|
126
|
+
lines.append(cost_item_line(item))
|
|
127
|
+
lines.append("")
|
|
128
|
+
lines.append("Lowest Savings")
|
|
129
|
+
for item in cost["low_savings"]:
|
|
130
|
+
lines.append(cost_item_line(item))
|
|
131
|
+
return "\n".join(lines).rstrip()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def render_cost_markdown(cost: dict[str, Any]) -> str:
|
|
135
|
+
summary = cost["summary"]
|
|
136
|
+
lines = [
|
|
137
|
+
f"- Items: `{summary['item_count']}`",
|
|
138
|
+
f"- Kernel tokens: `{summary['kernel_tokens']}`",
|
|
139
|
+
f"- Baseline tokens: `{summary['baseline_tokens']}`",
|
|
140
|
+
f"- Savings: `{summary['savings_tokens']}` tokens (`{summary['savings_percent']}%`)",
|
|
141
|
+
f"- Execution tokens: `{summary['execution_tokens']}`",
|
|
142
|
+
f"- Checks: `{summary['passed_checks']}/{summary['total_checks']}`",
|
|
143
|
+
f"- Executed items: `{summary['executed_items']}`",
|
|
144
|
+
f"- Blocked items: `{summary['blocked_items']}`",
|
|
145
|
+
"",
|
|
146
|
+
"### Hotspots",
|
|
147
|
+
"",
|
|
148
|
+
"| Scope | Kernel | Baseline | Savings | Checks | Execution |",
|
|
149
|
+
"| --- | ---: | ---: | ---: | ---: | ---: |",
|
|
150
|
+
]
|
|
151
|
+
for item in cost["hotspots"]:
|
|
152
|
+
lines.append(
|
|
153
|
+
"| "
|
|
154
|
+
+ " | ".join(
|
|
155
|
+
[
|
|
156
|
+
cost_scope(item),
|
|
157
|
+
str(item["kernel_tokens"]),
|
|
158
|
+
str(item["baseline_tokens"]),
|
|
159
|
+
f"{item['savings_percent']}%",
|
|
160
|
+
item["checks"],
|
|
161
|
+
str(item["execution_tokens"]),
|
|
162
|
+
]
|
|
163
|
+
)
|
|
164
|
+
+ " |"
|
|
165
|
+
)
|
|
166
|
+
lines.extend(["", "### Lowest Savings", "", "| Scope | Kernel | Baseline | Savings | Checks | Execution |", "| --- | ---: | ---: | ---: | ---: | ---: |"])
|
|
167
|
+
for item in cost["low_savings"]:
|
|
168
|
+
lines.append(
|
|
169
|
+
"| "
|
|
170
|
+
+ " | ".join(
|
|
171
|
+
[
|
|
172
|
+
cost_scope(item),
|
|
173
|
+
str(item["kernel_tokens"]),
|
|
174
|
+
str(item["baseline_tokens"]),
|
|
175
|
+
f"{item['savings_percent']}%",
|
|
176
|
+
item["checks"],
|
|
177
|
+
str(item["execution_tokens"]),
|
|
178
|
+
]
|
|
179
|
+
)
|
|
180
|
+
+ " |"
|
|
181
|
+
)
|
|
182
|
+
return "\n".join(lines).rstrip()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def diff_cost_reports(
|
|
186
|
+
before: dict[str, Any],
|
|
187
|
+
after: dict[str, Any],
|
|
188
|
+
*,
|
|
189
|
+
token_tolerance: int = DEFAULT_COST_TOKEN_TOLERANCE,
|
|
190
|
+
) -> dict[str, Any]:
|
|
191
|
+
before_summary = before.get("summary", {})
|
|
192
|
+
after_summary = after.get("summary", {})
|
|
193
|
+
before_hotspot = first_item(before.get("hotspots", []))
|
|
194
|
+
after_hotspot = first_item(after.get("hotspots", []))
|
|
195
|
+
before_weakest = first_item(before.get("low_savings", []))
|
|
196
|
+
after_weakest = first_item(after.get("low_savings", []))
|
|
197
|
+
item_diffs = diff_cost_items(before.get("items", []), after.get("items", []))
|
|
198
|
+
regressions: list[dict[str, Any]] = []
|
|
199
|
+
|
|
200
|
+
execution_delta = int(scalar_delta(before_summary, after_summary, "execution_tokens"))
|
|
201
|
+
if execution_delta > token_tolerance:
|
|
202
|
+
regressions.append(
|
|
203
|
+
{
|
|
204
|
+
"kind": "execution_tokens",
|
|
205
|
+
"message": f"execution tokens increased by {execution_delta}",
|
|
206
|
+
"delta": execution_delta,
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
hotspot_change = focus_change(before_hotspot, after_hotspot, metric_key="kernel_tokens")
|
|
211
|
+
if hotspot_change["metric_delta"] > token_tolerance:
|
|
212
|
+
regressions.append(
|
|
213
|
+
{
|
|
214
|
+
"kind": "hotspot_kernel_tokens",
|
|
215
|
+
"message": (
|
|
216
|
+
f"hotspot kernel tokens increased from {hotspot_change['before_value']} "
|
|
217
|
+
f"to {hotspot_change['after_value']}"
|
|
218
|
+
),
|
|
219
|
+
"scope": hotspot_change["after_scope"] or hotspot_change["before_scope"],
|
|
220
|
+
"delta": hotspot_change["metric_delta"],
|
|
221
|
+
}
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
weakest_savings_change = focus_change(before_weakest, after_weakest, metric_key="savings_percent")
|
|
225
|
+
if weakest_savings_change["metric_delta"] < 0:
|
|
226
|
+
regressions.append(
|
|
227
|
+
{
|
|
228
|
+
"kind": "weakest_savings_percent",
|
|
229
|
+
"message": (
|
|
230
|
+
f"lowest savings dropped from {weakest_savings_change['before_value']}% "
|
|
231
|
+
f"to {weakest_savings_change['after_value']}%"
|
|
232
|
+
),
|
|
233
|
+
"scope": weakest_savings_change["after_scope"] or weakest_savings_change["before_scope"],
|
|
234
|
+
"delta": weakest_savings_change["metric_delta"],
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
execution_item_regressions = [
|
|
239
|
+
item
|
|
240
|
+
for item in item_diffs
|
|
241
|
+
if item["status"] == "changed" and item["execution_tokens_delta"] > token_tolerance
|
|
242
|
+
]
|
|
243
|
+
for item in execution_item_regressions:
|
|
244
|
+
regressions.append(
|
|
245
|
+
{
|
|
246
|
+
"kind": "item_execution_tokens",
|
|
247
|
+
"message": f"{item['scope']} execution tokens increased by {item['execution_tokens_delta']}",
|
|
248
|
+
"scope": item["scope"],
|
|
249
|
+
"delta": item["execution_tokens_delta"],
|
|
250
|
+
}
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
"before": cost_report_ref(before),
|
|
255
|
+
"after": cost_report_ref(after),
|
|
256
|
+
"summary_delta": {
|
|
257
|
+
"items": int(scalar_delta(before_summary, after_summary, "item_count")),
|
|
258
|
+
"kernel_tokens": int(scalar_delta(before_summary, after_summary, "kernel_tokens")),
|
|
259
|
+
"baseline_tokens": int(scalar_delta(before_summary, after_summary, "baseline_tokens")),
|
|
260
|
+
"savings_tokens": int(scalar_delta(before_summary, after_summary, "savings_tokens")),
|
|
261
|
+
"savings_percent": round(scalar_delta(before_summary, after_summary, "savings_percent"), 2),
|
|
262
|
+
"average_savings_percent": round(scalar_delta(before_summary, after_summary, "average_savings_percent"), 2),
|
|
263
|
+
"execution_tokens": execution_delta,
|
|
264
|
+
"passed_checks": int(scalar_delta(before_summary, after_summary, "passed_checks")),
|
|
265
|
+
"total_checks": int(scalar_delta(before_summary, after_summary, "total_checks")),
|
|
266
|
+
},
|
|
267
|
+
"hotspot_change": hotspot_change,
|
|
268
|
+
"weakest_savings_change": weakest_savings_change,
|
|
269
|
+
"items": item_diffs,
|
|
270
|
+
"regressions": regressions,
|
|
271
|
+
"ok": not regressions,
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def diff_cost_items(before_items: list[dict[str, Any]], after_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
276
|
+
before_by_scope = {cost_scope(item): item for item in before_items}
|
|
277
|
+
after_by_scope = {cost_scope(item): item for item in after_items}
|
|
278
|
+
scopes = sorted(set(before_by_scope).union(after_by_scope))
|
|
279
|
+
diffs: list[dict[str, Any]] = []
|
|
280
|
+
for scope in scopes:
|
|
281
|
+
before = before_by_scope.get(scope)
|
|
282
|
+
after = after_by_scope.get(scope)
|
|
283
|
+
if before is None:
|
|
284
|
+
diffs.append({"scope": scope, "status": "added"})
|
|
285
|
+
continue
|
|
286
|
+
if after is None:
|
|
287
|
+
diffs.append({"scope": scope, "status": "removed"})
|
|
288
|
+
continue
|
|
289
|
+
diffs.append(
|
|
290
|
+
{
|
|
291
|
+
"scope": scope,
|
|
292
|
+
"status": "changed",
|
|
293
|
+
"kernel_tokens_delta": int(after.get("kernel_tokens", 0) or 0) - int(before.get("kernel_tokens", 0) or 0),
|
|
294
|
+
"baseline_tokens_delta": int(after.get("baseline_tokens", 0) or 0) - int(before.get("baseline_tokens", 0) or 0),
|
|
295
|
+
"savings_tokens_delta": int(after.get("savings_tokens", 0) or 0) - int(before.get("savings_tokens", 0) or 0),
|
|
296
|
+
"savings_percent_delta": round(float(after.get("savings_percent", 0) or 0) - float(before.get("savings_percent", 0) or 0), 2),
|
|
297
|
+
"execution_tokens_delta": int(after.get("execution_tokens", 0) or 0) - int(before.get("execution_tokens", 0) or 0),
|
|
298
|
+
"before": compact_cost_item(before),
|
|
299
|
+
"after": compact_cost_item(after),
|
|
300
|
+
}
|
|
301
|
+
)
|
|
302
|
+
return diffs
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def focus_change(before_item: dict[str, Any] | None, after_item: dict[str, Any] | None, *, metric_key: str) -> dict[str, Any]:
|
|
306
|
+
before_value = metric_value(before_item, metric_key)
|
|
307
|
+
after_value = metric_value(after_item, metric_key)
|
|
308
|
+
return {
|
|
309
|
+
"before_scope": cost_scope(before_item) if before_item else "",
|
|
310
|
+
"after_scope": cost_scope(after_item) if after_item else "",
|
|
311
|
+
"before_value": before_value,
|
|
312
|
+
"after_value": after_value,
|
|
313
|
+
"metric_key": metric_key,
|
|
314
|
+
"metric_delta": round(after_value - before_value, 2),
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def compact_cost_item(item: dict[str, Any]) -> dict[str, Any]:
|
|
319
|
+
return {
|
|
320
|
+
"kernel_tokens": int(item.get("kernel_tokens", 0) or 0),
|
|
321
|
+
"baseline_tokens": int(item.get("baseline_tokens", 0) or 0),
|
|
322
|
+
"savings_percent": float(item.get("savings_percent", 0) or 0),
|
|
323
|
+
"execution_tokens": int(item.get("execution_tokens", 0) or 0),
|
|
324
|
+
"checks": item.get("checks"),
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def cost_report_ref(cost: dict[str, Any]) -> dict[str, Any]:
|
|
329
|
+
return {
|
|
330
|
+
"kind": cost.get("kind"),
|
|
331
|
+
"id": cost.get("id"),
|
|
332
|
+
"name": cost.get("name"),
|
|
333
|
+
"source": cost.get("source"),
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def first_item(items: list[dict[str, Any]]) -> dict[str, Any] | None:
|
|
338
|
+
return items[0] if items else None
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def metric_value(item: dict[str, Any] | None, key: str) -> float:
|
|
342
|
+
if not item:
|
|
343
|
+
return 0.0
|
|
344
|
+
try:
|
|
345
|
+
return float(item.get(key, 0) or 0)
|
|
346
|
+
except (TypeError, ValueError):
|
|
347
|
+
return 0.0
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def scalar_delta(before: dict[str, Any], after: dict[str, Any], key: str) -> float:
|
|
351
|
+
try:
|
|
352
|
+
before_value = float(before.get(key, 0) or 0)
|
|
353
|
+
except (TypeError, ValueError):
|
|
354
|
+
before_value = 0.0
|
|
355
|
+
try:
|
|
356
|
+
after_value = float(after.get(key, 0) or 0)
|
|
357
|
+
except (TypeError, ValueError):
|
|
358
|
+
after_value = 0.0
|
|
359
|
+
return after_value - before_value
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def cost_scope(item: dict[str, Any]) -> str:
|
|
363
|
+
fixture = item.get("fixture")
|
|
364
|
+
task_id = str(item.get("id", ""))
|
|
365
|
+
if fixture:
|
|
366
|
+
return f"{fixture}/{task_id}"
|
|
367
|
+
return task_id
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def cost_item_line(item: dict[str, Any]) -> str:
|
|
371
|
+
scope = cost_scope(item)
|
|
372
|
+
return (
|
|
373
|
+
f"- {scope}: kernel={item['kernel_tokens']} baseline={item['baseline_tokens']} "
|
|
374
|
+
f"savings={item['savings_percent']}% checks={item['checks']} execution={item['execution_tokens']}"
|
|
375
|
+
)
|
context_kernel/runner.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from typing import Any
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from .context import ContextBuilder
|
|
8
|
+
from .models import utc_now
|
|
9
|
+
from .providers import get_provider
|
|
10
|
+
from .state_writer import StateWriter
|
|
11
|
+
from .storage import Workspace
|
|
12
|
+
from .verifier import combine_verifications, enforce_preflight, verify_response
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AgentRunner:
|
|
16
|
+
def __init__(self, workspace: Workspace):
|
|
17
|
+
self.workspace = workspace
|
|
18
|
+
|
|
19
|
+
def run(
|
|
20
|
+
self,
|
|
21
|
+
request: str,
|
|
22
|
+
provider_name: str,
|
|
23
|
+
budget: int | None,
|
|
24
|
+
profile: str = "balanced",
|
|
25
|
+
model: str | None = None,
|
|
26
|
+
base_url: str | None = None,
|
|
27
|
+
allow_over_budget: bool = False,
|
|
28
|
+
expect_json: bool = False,
|
|
29
|
+
remember: bool = False,
|
|
30
|
+
task_id: str | None = None,
|
|
31
|
+
resume: bool = False,
|
|
32
|
+
packet_overrides: dict[str, Any] | None = None,
|
|
33
|
+
) -> dict[str, Any]:
|
|
34
|
+
packet = ContextBuilder(self.workspace).build(
|
|
35
|
+
request,
|
|
36
|
+
budget,
|
|
37
|
+
profile,
|
|
38
|
+
task_id=task_id,
|
|
39
|
+
resume=resume,
|
|
40
|
+
)
|
|
41
|
+
if packet_overrides:
|
|
42
|
+
packet = merge_dicts(packet, packet_overrides)
|
|
43
|
+
preflight = enforce_preflight(packet, allow_over_budget=allow_over_budget)
|
|
44
|
+
provider = get_provider(provider_name, model=model, base_url=base_url)
|
|
45
|
+
response = provider.run(packet)
|
|
46
|
+
response_verifier = verify_response(response.text, expect_json=expect_json)
|
|
47
|
+
trace = {
|
|
48
|
+
"id": uuid4().hex[:12],
|
|
49
|
+
"created_at": utc_now(),
|
|
50
|
+
"provider": provider.name,
|
|
51
|
+
"model": getattr(provider, "model", None),
|
|
52
|
+
"request": request,
|
|
53
|
+
"task_id": task_id,
|
|
54
|
+
"resume": resume,
|
|
55
|
+
"context_packet": packet,
|
|
56
|
+
"response": {
|
|
57
|
+
"text": response.text,
|
|
58
|
+
"input_tokens": response.input_tokens,
|
|
59
|
+
"output_tokens": response.output_tokens,
|
|
60
|
+
"total_tokens": response.input_tokens + response.output_tokens,
|
|
61
|
+
},
|
|
62
|
+
"verifier": combine_verifications("run", preflight, response_verifier),
|
|
63
|
+
"state": {"enabled": False, "candidate_count": 0, "written_count": 0, "records": []},
|
|
64
|
+
}
|
|
65
|
+
if remember:
|
|
66
|
+
trace["state"] = StateWriter(self.workspace).write_from_trace(trace)
|
|
67
|
+
Workspace.write_json(self.workspace.traces_dir / f"{trace['id']}.json", trace)
|
|
68
|
+
return trace
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def merge_dicts(base: dict[str, Any], overrides: dict[str, Any]) -> dict[str, Any]:
|
|
72
|
+
merged = deepcopy(base)
|
|
73
|
+
for key, value in overrides.items():
|
|
74
|
+
if isinstance(value, dict) and isinstance(merged.get(key), dict):
|
|
75
|
+
merged[key] = merge_dicts(merged[key], value)
|
|
76
|
+
else:
|
|
77
|
+
merged[key] = deepcopy(value)
|
|
78
|
+
return merged
|