akernel-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. akernel_runtime-0.1.0.dist-info/METADATA +270 -0
  2. akernel_runtime-0.1.0.dist-info/RECORD +40 -0
  3. akernel_runtime-0.1.0.dist-info/WHEEL +5 -0
  4. akernel_runtime-0.1.0.dist-info/entry_points.txt +2 -0
  5. akernel_runtime-0.1.0.dist-info/licenses/LICENSE +201 -0
  6. akernel_runtime-0.1.0.dist-info/licenses/NOTICE +4 -0
  7. akernel_runtime-0.1.0.dist-info/top_level.txt +1 -0
  8. context_kernel/__init__.py +4 -0
  9. context_kernel/__main__.py +5 -0
  10. context_kernel/agent_reports.py +188 -0
  11. context_kernel/benchmarks.py +493 -0
  12. context_kernel/budget.py +72 -0
  13. context_kernel/cli.py +2953 -0
  14. context_kernel/context.py +161 -0
  15. context_kernel/evals.py +347 -0
  16. context_kernel/global_memory.py +126 -0
  17. context_kernel/loop.py +1617 -0
  18. context_kernel/marketplace.py +194 -0
  19. context_kernel/marketplace_data/skills/context_budget.json +27 -0
  20. context_kernel/marketplace_data/skills/context_compaction.json +27 -0
  21. context_kernel/marketplace_data/skills/edit_file.json +27 -0
  22. context_kernel/marketplace_data/skills/index.json +66 -0
  23. context_kernel/marketplace_data/skills/long_task_planning.json +27 -0
  24. context_kernel/marketplace_data/skills/multi_file_bugfix.json +28 -0
  25. context_kernel/memory.py +515 -0
  26. context_kernel/models.py +144 -0
  27. context_kernel/planner.py +155 -0
  28. context_kernel/policy.py +271 -0
  29. context_kernel/project.py +317 -0
  30. context_kernel/providers.py +1264 -0
  31. context_kernel/report_costs.py +375 -0
  32. context_kernel/runner.py +78 -0
  33. context_kernel/skills.py +318 -0
  34. context_kernel/state_writer.py +108 -0
  35. context_kernel/storage.py +171 -0
  36. context_kernel/tasks.py +549 -0
  37. context_kernel/text.py +42 -0
  38. context_kernel/tokenizer.py +22 -0
  39. context_kernel/tools.py +544 -0
  40. context_kernel/verifier.py +77 -0
@@ -0,0 +1,375 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ DEFAULT_COST_TOKEN_TOLERANCE = 10
7
+
8
+
9
+ def build_eval_cost_report(report: dict[str, Any]) -> dict[str, Any]:
10
+ return build_cost_report(
11
+ kind="eval",
12
+ report=report,
13
+ items=report.get("tasks", []),
14
+ source_label=str(report.get("fixture", "")),
15
+ )
16
+
17
+
18
+ def build_benchmark_cost_report(report: dict[str, Any]) -> dict[str, Any]:
19
+ items: list[dict[str, Any]] = []
20
+ for fixture in report.get("fixtures", []):
21
+ fixture_name = Path(str(fixture.get("fixture", ""))).name
22
+ for task in fixture.get("tasks", []):
23
+ item = dict(task)
24
+ item["fixture"] = fixture_name
25
+ items.append(item)
26
+ return build_cost_report(
27
+ kind="benchmark",
28
+ report=report,
29
+ items=items,
30
+ source_label=str(report.get("benchmark", "")),
31
+ )
32
+
33
+
34
+ def build_cost_report(*, kind: str, report: dict[str, Any], items: list[dict[str, Any]], source_label: str) -> dict[str, Any]:
35
+ summaries = [cost_item_snapshot(item) for item in items]
36
+ hotspots = sorted(summaries, key=lambda item: item["kernel_tokens"], reverse=True)[:5]
37
+ low_savings = sorted(summaries, key=lambda item: item["savings_percent"])[:5]
38
+ summary = report.get("summary", {})
39
+ return {
40
+ "kind": kind,
41
+ "id": report.get("id"),
42
+ "name": report.get("name"),
43
+ "source": source_label,
44
+ "summary": {
45
+ "item_count": len(summaries),
46
+ "kernel_tokens": int(summary.get("total_kernel_tokens", 0) or 0),
47
+ "baseline_tokens": int(summary.get("total_baseline_tokens", 0) or 0),
48
+ "savings_tokens": int(summary.get("total_savings_tokens", 0) or 0),
49
+ "savings_percent": float(summary.get("total_savings_percent", 0) or 0),
50
+ "average_savings_percent": float(summary.get("average_savings_percent", 0) or 0),
51
+ "execution_tokens": int(summary.get("total_execution_tokens", 0) or 0),
52
+ "passed_checks": int(summary.get("passed_checks", 0) or 0),
53
+ "total_checks": int(summary.get("total_checks", 0) or 0),
54
+ "executed_items": int(summary.get("executed_tasks", 0) or 0),
55
+ "blocked_items": int(summary.get("blocked_tasks", 0) or 0),
56
+ },
57
+ "hotspots": hotspots,
58
+ "low_savings": low_savings,
59
+ "items": summaries,
60
+ }
61
+
62
+
63
+ def cost_item_snapshot(item: dict[str, Any]) -> dict[str, Any]:
64
+ kernel = item.get("kernel", {})
65
+ baseline = item.get("baseline", {})
66
+ savings = item.get("savings", {})
67
+ execution = item.get("execution", {}) if isinstance(item.get("execution"), dict) else {}
68
+ checks = item.get("checks", {})
69
+ kernel_tokens = int(kernel.get("estimated_tokens", 0) or 0)
70
+ baseline_tokens = int(baseline.get("estimated_tokens", 0) or 0)
71
+ execution_tokens = int(execution.get("total_tokens", 0) or 0)
72
+ savings_tokens = max(0, baseline_tokens - kernel_tokens)
73
+ savings_percent = float(savings.get("percent", 0) or 0)
74
+ snapshot = {
75
+ "id": item.get("id"),
76
+ "profile": item.get("profile"),
77
+ "kernel_tokens": kernel_tokens,
78
+ "baseline_tokens": baseline_tokens,
79
+ "savings_tokens": savings_tokens,
80
+ "savings_percent": savings_percent,
81
+ "execution_tokens": execution_tokens,
82
+ "checks": f"{int(checks.get('passed', 0) or 0)}/{int(checks.get('total', 0) or 0)}",
83
+ "executed": bool(execution) and not bool(execution.get("blocked")),
84
+ "blocked": bool(execution.get("blocked")),
85
+ }
86
+ if item.get("fixture"):
87
+ snapshot["fixture"] = item.get("fixture")
88
+ return snapshot
89
+
90
+
91
+ def render_cost_report(cost: dict[str, Any]) -> str:
92
+ summary = cost["summary"]
93
+ lines = [
94
+ f"{cost['kind']}_cost: {cost['id']}",
95
+ f"name: {cost.get('name', '')}",
96
+ f"source: {cost.get('source', '')}",
97
+ f"items: {summary['item_count']}",
98
+ (
99
+ f"tokens: kernel={summary['kernel_tokens']} baseline={summary['baseline_tokens']} "
100
+ f"savings={summary['savings_tokens']} ({summary['savings_percent']}%) "
101
+ f"execution={summary['execution_tokens']}"
102
+ ),
103
+ (
104
+ f"checks: passed={summary['passed_checks']}/{summary['total_checks']} "
105
+ f"executed={summary['executed_items']} blocked={summary['blocked_items']}"
106
+ ),
107
+ f"average_savings_percent: {summary['average_savings_percent']}",
108
+ ]
109
+ if cost["hotspots"]:
110
+ hotspot = cost["hotspots"][0]
111
+ scope = cost_scope(hotspot)
112
+ lines.append(
113
+ f"hotspot: {scope} kernel={hotspot['kernel_tokens']} baseline={hotspot['baseline_tokens']} "
114
+ f"savings={hotspot['savings_percent']}% checks={hotspot['checks']}"
115
+ )
116
+ if cost["low_savings"]:
117
+ weakest = cost["low_savings"][0]
118
+ scope = cost_scope(weakest)
119
+ lines.append(
120
+ f"weakest_savings: {scope} kernel={weakest['kernel_tokens']} baseline={weakest['baseline_tokens']} "
121
+ f"savings={weakest['savings_percent']}% checks={weakest['checks']}"
122
+ )
123
+ lines.append("")
124
+ lines.append("Hotspots")
125
+ for item in cost["hotspots"]:
126
+ lines.append(cost_item_line(item))
127
+ lines.append("")
128
+ lines.append("Lowest Savings")
129
+ for item in cost["low_savings"]:
130
+ lines.append(cost_item_line(item))
131
+ return "\n".join(lines).rstrip()
132
+
133
+
134
+ def render_cost_markdown(cost: dict[str, Any]) -> str:
135
+ summary = cost["summary"]
136
+ lines = [
137
+ f"- Items: `{summary['item_count']}`",
138
+ f"- Kernel tokens: `{summary['kernel_tokens']}`",
139
+ f"- Baseline tokens: `{summary['baseline_tokens']}`",
140
+ f"- Savings: `{summary['savings_tokens']}` tokens (`{summary['savings_percent']}%`)",
141
+ f"- Execution tokens: `{summary['execution_tokens']}`",
142
+ f"- Checks: `{summary['passed_checks']}/{summary['total_checks']}`",
143
+ f"- Executed items: `{summary['executed_items']}`",
144
+ f"- Blocked items: `{summary['blocked_items']}`",
145
+ "",
146
+ "### Hotspots",
147
+ "",
148
+ "| Scope | Kernel | Baseline | Savings | Checks | Execution |",
149
+ "| --- | ---: | ---: | ---: | ---: | ---: |",
150
+ ]
151
+ for item in cost["hotspots"]:
152
+ lines.append(
153
+ "| "
154
+ + " | ".join(
155
+ [
156
+ cost_scope(item),
157
+ str(item["kernel_tokens"]),
158
+ str(item["baseline_tokens"]),
159
+ f"{item['savings_percent']}%",
160
+ item["checks"],
161
+ str(item["execution_tokens"]),
162
+ ]
163
+ )
164
+ + " |"
165
+ )
166
+ lines.extend(["", "### Lowest Savings", "", "| Scope | Kernel | Baseline | Savings | Checks | Execution |", "| --- | ---: | ---: | ---: | ---: | ---: |"])
167
+ for item in cost["low_savings"]:
168
+ lines.append(
169
+ "| "
170
+ + " | ".join(
171
+ [
172
+ cost_scope(item),
173
+ str(item["kernel_tokens"]),
174
+ str(item["baseline_tokens"]),
175
+ f"{item['savings_percent']}%",
176
+ item["checks"],
177
+ str(item["execution_tokens"]),
178
+ ]
179
+ )
180
+ + " |"
181
+ )
182
+ return "\n".join(lines).rstrip()
183
+
184
+
185
+ def diff_cost_reports(
186
+ before: dict[str, Any],
187
+ after: dict[str, Any],
188
+ *,
189
+ token_tolerance: int = DEFAULT_COST_TOKEN_TOLERANCE,
190
+ ) -> dict[str, Any]:
191
+ before_summary = before.get("summary", {})
192
+ after_summary = after.get("summary", {})
193
+ before_hotspot = first_item(before.get("hotspots", []))
194
+ after_hotspot = first_item(after.get("hotspots", []))
195
+ before_weakest = first_item(before.get("low_savings", []))
196
+ after_weakest = first_item(after.get("low_savings", []))
197
+ item_diffs = diff_cost_items(before.get("items", []), after.get("items", []))
198
+ regressions: list[dict[str, Any]] = []
199
+
200
+ execution_delta = int(scalar_delta(before_summary, after_summary, "execution_tokens"))
201
+ if execution_delta > token_tolerance:
202
+ regressions.append(
203
+ {
204
+ "kind": "execution_tokens",
205
+ "message": f"execution tokens increased by {execution_delta}",
206
+ "delta": execution_delta,
207
+ }
208
+ )
209
+
210
+ hotspot_change = focus_change(before_hotspot, after_hotspot, metric_key="kernel_tokens")
211
+ if hotspot_change["metric_delta"] > token_tolerance:
212
+ regressions.append(
213
+ {
214
+ "kind": "hotspot_kernel_tokens",
215
+ "message": (
216
+ f"hotspot kernel tokens increased from {hotspot_change['before_value']} "
217
+ f"to {hotspot_change['after_value']}"
218
+ ),
219
+ "scope": hotspot_change["after_scope"] or hotspot_change["before_scope"],
220
+ "delta": hotspot_change["metric_delta"],
221
+ }
222
+ )
223
+
224
+ weakest_savings_change = focus_change(before_weakest, after_weakest, metric_key="savings_percent")
225
+ if weakest_savings_change["metric_delta"] < 0:
226
+ regressions.append(
227
+ {
228
+ "kind": "weakest_savings_percent",
229
+ "message": (
230
+ f"lowest savings dropped from {weakest_savings_change['before_value']}% "
231
+ f"to {weakest_savings_change['after_value']}%"
232
+ ),
233
+ "scope": weakest_savings_change["after_scope"] or weakest_savings_change["before_scope"],
234
+ "delta": weakest_savings_change["metric_delta"],
235
+ }
236
+ )
237
+
238
+ execution_item_regressions = [
239
+ item
240
+ for item in item_diffs
241
+ if item["status"] == "changed" and item["execution_tokens_delta"] > token_tolerance
242
+ ]
243
+ for item in execution_item_regressions:
244
+ regressions.append(
245
+ {
246
+ "kind": "item_execution_tokens",
247
+ "message": f"{item['scope']} execution tokens increased by {item['execution_tokens_delta']}",
248
+ "scope": item["scope"],
249
+ "delta": item["execution_tokens_delta"],
250
+ }
251
+ )
252
+
253
+ return {
254
+ "before": cost_report_ref(before),
255
+ "after": cost_report_ref(after),
256
+ "summary_delta": {
257
+ "items": int(scalar_delta(before_summary, after_summary, "item_count")),
258
+ "kernel_tokens": int(scalar_delta(before_summary, after_summary, "kernel_tokens")),
259
+ "baseline_tokens": int(scalar_delta(before_summary, after_summary, "baseline_tokens")),
260
+ "savings_tokens": int(scalar_delta(before_summary, after_summary, "savings_tokens")),
261
+ "savings_percent": round(scalar_delta(before_summary, after_summary, "savings_percent"), 2),
262
+ "average_savings_percent": round(scalar_delta(before_summary, after_summary, "average_savings_percent"), 2),
263
+ "execution_tokens": execution_delta,
264
+ "passed_checks": int(scalar_delta(before_summary, after_summary, "passed_checks")),
265
+ "total_checks": int(scalar_delta(before_summary, after_summary, "total_checks")),
266
+ },
267
+ "hotspot_change": hotspot_change,
268
+ "weakest_savings_change": weakest_savings_change,
269
+ "items": item_diffs,
270
+ "regressions": regressions,
271
+ "ok": not regressions,
272
+ }
273
+
274
+
275
+ def diff_cost_items(before_items: list[dict[str, Any]], after_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
276
+ before_by_scope = {cost_scope(item): item for item in before_items}
277
+ after_by_scope = {cost_scope(item): item for item in after_items}
278
+ scopes = sorted(set(before_by_scope).union(after_by_scope))
279
+ diffs: list[dict[str, Any]] = []
280
+ for scope in scopes:
281
+ before = before_by_scope.get(scope)
282
+ after = after_by_scope.get(scope)
283
+ if before is None:
284
+ diffs.append({"scope": scope, "status": "added"})
285
+ continue
286
+ if after is None:
287
+ diffs.append({"scope": scope, "status": "removed"})
288
+ continue
289
+ diffs.append(
290
+ {
291
+ "scope": scope,
292
+ "status": "changed",
293
+ "kernel_tokens_delta": int(after.get("kernel_tokens", 0) or 0) - int(before.get("kernel_tokens", 0) or 0),
294
+ "baseline_tokens_delta": int(after.get("baseline_tokens", 0) or 0) - int(before.get("baseline_tokens", 0) or 0),
295
+ "savings_tokens_delta": int(after.get("savings_tokens", 0) or 0) - int(before.get("savings_tokens", 0) or 0),
296
+ "savings_percent_delta": round(float(after.get("savings_percent", 0) or 0) - float(before.get("savings_percent", 0) or 0), 2),
297
+ "execution_tokens_delta": int(after.get("execution_tokens", 0) or 0) - int(before.get("execution_tokens", 0) or 0),
298
+ "before": compact_cost_item(before),
299
+ "after": compact_cost_item(after),
300
+ }
301
+ )
302
+ return diffs
303
+
304
+
305
+ def focus_change(before_item: dict[str, Any] | None, after_item: dict[str, Any] | None, *, metric_key: str) -> dict[str, Any]:
306
+ before_value = metric_value(before_item, metric_key)
307
+ after_value = metric_value(after_item, metric_key)
308
+ return {
309
+ "before_scope": cost_scope(before_item) if before_item else "",
310
+ "after_scope": cost_scope(after_item) if after_item else "",
311
+ "before_value": before_value,
312
+ "after_value": after_value,
313
+ "metric_key": metric_key,
314
+ "metric_delta": round(after_value - before_value, 2),
315
+ }
316
+
317
+
318
+ def compact_cost_item(item: dict[str, Any]) -> dict[str, Any]:
319
+ return {
320
+ "kernel_tokens": int(item.get("kernel_tokens", 0) or 0),
321
+ "baseline_tokens": int(item.get("baseline_tokens", 0) or 0),
322
+ "savings_percent": float(item.get("savings_percent", 0) or 0),
323
+ "execution_tokens": int(item.get("execution_tokens", 0) or 0),
324
+ "checks": item.get("checks"),
325
+ }
326
+
327
+
328
+ def cost_report_ref(cost: dict[str, Any]) -> dict[str, Any]:
329
+ return {
330
+ "kind": cost.get("kind"),
331
+ "id": cost.get("id"),
332
+ "name": cost.get("name"),
333
+ "source": cost.get("source"),
334
+ }
335
+
336
+
337
+ def first_item(items: list[dict[str, Any]]) -> dict[str, Any] | None:
338
+ return items[0] if items else None
339
+
340
+
341
+ def metric_value(item: dict[str, Any] | None, key: str) -> float:
342
+ if not item:
343
+ return 0.0
344
+ try:
345
+ return float(item.get(key, 0) or 0)
346
+ except (TypeError, ValueError):
347
+ return 0.0
348
+
349
+
350
+ def scalar_delta(before: dict[str, Any], after: dict[str, Any], key: str) -> float:
351
+ try:
352
+ before_value = float(before.get(key, 0) or 0)
353
+ except (TypeError, ValueError):
354
+ before_value = 0.0
355
+ try:
356
+ after_value = float(after.get(key, 0) or 0)
357
+ except (TypeError, ValueError):
358
+ after_value = 0.0
359
+ return after_value - before_value
360
+
361
+
362
+ def cost_scope(item: dict[str, Any]) -> str:
363
+ fixture = item.get("fixture")
364
+ task_id = str(item.get("id", ""))
365
+ if fixture:
366
+ return f"{fixture}/{task_id}"
367
+ return task_id
368
+
369
+
370
+ def cost_item_line(item: dict[str, Any]) -> str:
371
+ scope = cost_scope(item)
372
+ return (
373
+ f"- {scope}: kernel={item['kernel_tokens']} baseline={item['baseline_tokens']} "
374
+ f"savings={item['savings_percent']}% checks={item['checks']} execution={item['execution_tokens']}"
375
+ )
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ from copy import deepcopy
4
+ from typing import Any
5
+ from uuid import uuid4
6
+
7
+ from .context import ContextBuilder
8
+ from .models import utc_now
9
+ from .providers import get_provider
10
+ from .state_writer import StateWriter
11
+ from .storage import Workspace
12
+ from .verifier import combine_verifications, enforce_preflight, verify_response
13
+
14
+
15
+ class AgentRunner:
16
+ def __init__(self, workspace: Workspace):
17
+ self.workspace = workspace
18
+
19
+ def run(
20
+ self,
21
+ request: str,
22
+ provider_name: str,
23
+ budget: int | None,
24
+ profile: str = "balanced",
25
+ model: str | None = None,
26
+ base_url: str | None = None,
27
+ allow_over_budget: bool = False,
28
+ expect_json: bool = False,
29
+ remember: bool = False,
30
+ task_id: str | None = None,
31
+ resume: bool = False,
32
+ packet_overrides: dict[str, Any] | None = None,
33
+ ) -> dict[str, Any]:
34
+ packet = ContextBuilder(self.workspace).build(
35
+ request,
36
+ budget,
37
+ profile,
38
+ task_id=task_id,
39
+ resume=resume,
40
+ )
41
+ if packet_overrides:
42
+ packet = merge_dicts(packet, packet_overrides)
43
+ preflight = enforce_preflight(packet, allow_over_budget=allow_over_budget)
44
+ provider = get_provider(provider_name, model=model, base_url=base_url)
45
+ response = provider.run(packet)
46
+ response_verifier = verify_response(response.text, expect_json=expect_json)
47
+ trace = {
48
+ "id": uuid4().hex[:12],
49
+ "created_at": utc_now(),
50
+ "provider": provider.name,
51
+ "model": getattr(provider, "model", None),
52
+ "request": request,
53
+ "task_id": task_id,
54
+ "resume": resume,
55
+ "context_packet": packet,
56
+ "response": {
57
+ "text": response.text,
58
+ "input_tokens": response.input_tokens,
59
+ "output_tokens": response.output_tokens,
60
+ "total_tokens": response.input_tokens + response.output_tokens,
61
+ },
62
+ "verifier": combine_verifications("run", preflight, response_verifier),
63
+ "state": {"enabled": False, "candidate_count": 0, "written_count": 0, "records": []},
64
+ }
65
+ if remember:
66
+ trace["state"] = StateWriter(self.workspace).write_from_trace(trace)
67
+ Workspace.write_json(self.workspace.traces_dir / f"{trace['id']}.json", trace)
68
+ return trace
69
+
70
+
71
+ def merge_dicts(base: dict[str, Any], overrides: dict[str, Any]) -> dict[str, Any]:
72
+ merged = deepcopy(base)
73
+ for key, value in overrides.items():
74
+ if isinstance(value, dict) and isinstance(merged.get(key), dict):
75
+ merged[key] = merge_dicts(merged[key], value)
76
+ else:
77
+ merged[key] = deepcopy(value)
78
+ return merged