akernel-runtime 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akernel_runtime-0.1.0.dist-info/METADATA +270 -0
- akernel_runtime-0.1.0.dist-info/RECORD +40 -0
- akernel_runtime-0.1.0.dist-info/WHEEL +5 -0
- akernel_runtime-0.1.0.dist-info/entry_points.txt +2 -0
- akernel_runtime-0.1.0.dist-info/licenses/LICENSE +201 -0
- akernel_runtime-0.1.0.dist-info/licenses/NOTICE +4 -0
- akernel_runtime-0.1.0.dist-info/top_level.txt +1 -0
- context_kernel/__init__.py +4 -0
- context_kernel/__main__.py +5 -0
- context_kernel/agent_reports.py +188 -0
- context_kernel/benchmarks.py +493 -0
- context_kernel/budget.py +72 -0
- context_kernel/cli.py +2953 -0
- context_kernel/context.py +161 -0
- context_kernel/evals.py +347 -0
- context_kernel/global_memory.py +126 -0
- context_kernel/loop.py +1617 -0
- context_kernel/marketplace.py +194 -0
- context_kernel/marketplace_data/skills/context_budget.json +27 -0
- context_kernel/marketplace_data/skills/context_compaction.json +27 -0
- context_kernel/marketplace_data/skills/edit_file.json +27 -0
- context_kernel/marketplace_data/skills/index.json +66 -0
- context_kernel/marketplace_data/skills/long_task_planning.json +27 -0
- context_kernel/marketplace_data/skills/multi_file_bugfix.json +28 -0
- context_kernel/memory.py +515 -0
- context_kernel/models.py +144 -0
- context_kernel/planner.py +155 -0
- context_kernel/policy.py +271 -0
- context_kernel/project.py +317 -0
- context_kernel/providers.py +1264 -0
- context_kernel/report_costs.py +375 -0
- context_kernel/runner.py +78 -0
- context_kernel/skills.py +318 -0
- context_kernel/state_writer.py +108 -0
- context_kernel/storage.py +171 -0
- context_kernel/tasks.py +549 -0
- context_kernel/text.py +42 -0
- context_kernel/tokenizer.py +22 -0
- context_kernel/tools.py +544 -0
- context_kernel/verifier.py +77 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from .budget import allocate_budget
|
|
6
|
+
from .memory import MemoryStore
|
|
7
|
+
from .policy import summarize_command_policy
|
|
8
|
+
from .project import compact_project_profile, load_project_profile
|
|
9
|
+
from .skills import SkillRegistry
|
|
10
|
+
from .storage import Workspace
|
|
11
|
+
from .tasks import TaskStore
|
|
12
|
+
from .tokenizer import estimate_tokens
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ContextBuilder:
|
|
16
|
+
def __init__(self, workspace: Workspace):
|
|
17
|
+
self.workspace = workspace
|
|
18
|
+
self.memory = MemoryStore(workspace)
|
|
19
|
+
self.skills = SkillRegistry(workspace)
|
|
20
|
+
|
|
21
|
+
def build(
|
|
22
|
+
self,
|
|
23
|
+
request: str,
|
|
24
|
+
total_budget: int | None,
|
|
25
|
+
profile: str = "balanced",
|
|
26
|
+
task_id: str | None = None,
|
|
27
|
+
resume: bool = False,
|
|
28
|
+
) -> dict[str, Any]:
|
|
29
|
+
budget = allocate_budget(request, total_budget, profile)
|
|
30
|
+
config = self.workspace.load_config()
|
|
31
|
+
runtime_instructions = config.get("runtime_instructions", [])
|
|
32
|
+
task_brief = TaskStore(self.workspace).brief(task_id) if task_id and resume else None
|
|
33
|
+
|
|
34
|
+
selected_memory = self.memory.search(request, limit=6, budget_tokens=budget.memory)
|
|
35
|
+
selected_skills = self.skills.select(request, budget_tokens=budget.skills)
|
|
36
|
+
|
|
37
|
+
packet = {
|
|
38
|
+
"request": request,
|
|
39
|
+
"runtime": {
|
|
40
|
+
"instructions": runtime_instructions,
|
|
41
|
+
"budget_policy": "Load the smallest useful context packet. Escalate skill levels only when needed.",
|
|
42
|
+
"command_policy": summarize_command_policy(self.workspace),
|
|
43
|
+
"project": compact_project_profile(load_project_profile(self.workspace)),
|
|
44
|
+
},
|
|
45
|
+
"task": {
|
|
46
|
+
"resume": bool(task_brief),
|
|
47
|
+
"brief": task_brief,
|
|
48
|
+
},
|
|
49
|
+
"memory": [
|
|
50
|
+
{
|
|
51
|
+
"record": item.record.to_dict(),
|
|
52
|
+
"score": item.score,
|
|
53
|
+
"reason": item.reason,
|
|
54
|
+
"matched_terms": item.matched_terms,
|
|
55
|
+
"estimated_tokens": estimate_tokens(item.record.to_dict()),
|
|
56
|
+
}
|
|
57
|
+
for item in selected_memory
|
|
58
|
+
],
|
|
59
|
+
"skills": [
|
|
60
|
+
{
|
|
61
|
+
"level": item.level,
|
|
62
|
+
"score": item.score,
|
|
63
|
+
"reason": item.reason,
|
|
64
|
+
"matched_terms": item.matched_terms,
|
|
65
|
+
"estimated_tokens": estimate_tokens(item.skill.render_level(item.level)),
|
|
66
|
+
"contract": item.skill.render_level(item.level),
|
|
67
|
+
}
|
|
68
|
+
for item in selected_skills
|
|
69
|
+
],
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
used = estimate_tokens(packet)
|
|
73
|
+
packet["budget"] = {
|
|
74
|
+
"profile": budget.profile,
|
|
75
|
+
"total": budget.total,
|
|
76
|
+
"allocated": {
|
|
77
|
+
"request": budget.request,
|
|
78
|
+
"runtime": budget.runtime,
|
|
79
|
+
"memory": budget.memory,
|
|
80
|
+
"skills": budget.skills,
|
|
81
|
+
"reserve": budget.reserve,
|
|
82
|
+
},
|
|
83
|
+
"estimated_used": used,
|
|
84
|
+
"estimated_remaining": max(0, budget.total - used),
|
|
85
|
+
"over_budget": used > budget.total,
|
|
86
|
+
}
|
|
87
|
+
packet["omissions"] = self._omissions(packet)
|
|
88
|
+
return packet
|
|
89
|
+
|
|
90
|
+
def build_baseline(self, request: str) -> dict[str, Any]:
|
|
91
|
+
config = self.workspace.load_config()
|
|
92
|
+
packet = {
|
|
93
|
+
"request": request,
|
|
94
|
+
"runtime": {
|
|
95
|
+
"instructions": config.get("runtime_instructions", []),
|
|
96
|
+
"budget_policy": "Naive baseline loads all memory and full skill procedures.",
|
|
97
|
+
"command_policy": summarize_command_policy(self.workspace),
|
|
98
|
+
"project": compact_project_profile(load_project_profile(self.workspace)),
|
|
99
|
+
},
|
|
100
|
+
"memory": [record.to_dict() for record in self.memory.all()],
|
|
101
|
+
"skills": [skill.render_level("l3") for skill in self.skills.all()],
|
|
102
|
+
}
|
|
103
|
+
packet["budget"] = {
|
|
104
|
+
"estimated_used": estimate_tokens(packet),
|
|
105
|
+
"memory_count": len(packet["memory"]),
|
|
106
|
+
"skill_count": len(packet["skills"]),
|
|
107
|
+
"skill_level": "l3",
|
|
108
|
+
}
|
|
109
|
+
return packet
|
|
110
|
+
|
|
111
|
+
def compare(
|
|
112
|
+
self,
|
|
113
|
+
request: str,
|
|
114
|
+
total_budget: int | None,
|
|
115
|
+
profile: str = "balanced",
|
|
116
|
+
task_id: str | None = None,
|
|
117
|
+
resume: bool = False,
|
|
118
|
+
) -> dict[str, Any]:
|
|
119
|
+
kernel = self.build(request, total_budget, profile, task_id=task_id, resume=resume)
|
|
120
|
+
baseline = self.build_baseline(request)
|
|
121
|
+
kernel_tokens = kernel["budget"]["estimated_used"]
|
|
122
|
+
baseline_tokens = baseline["budget"]["estimated_used"]
|
|
123
|
+
savings = max(0, baseline_tokens - kernel_tokens)
|
|
124
|
+
savings_ratio = savings / baseline_tokens if baseline_tokens else 0.0
|
|
125
|
+
return {
|
|
126
|
+
"request": request,
|
|
127
|
+
"budget": kernel["budget"]["total"],
|
|
128
|
+
"profile": kernel["budget"]["profile"],
|
|
129
|
+
"kernel": {
|
|
130
|
+
"estimated_tokens": kernel_tokens,
|
|
131
|
+
"selected_memory": len(kernel["memory"]),
|
|
132
|
+
"selected_skills": len(kernel["skills"]),
|
|
133
|
+
"over_budget": kernel["budget"]["over_budget"],
|
|
134
|
+
"packet": kernel,
|
|
135
|
+
},
|
|
136
|
+
"baseline": {
|
|
137
|
+
"estimated_tokens": baseline_tokens,
|
|
138
|
+
"loaded_memory": baseline["budget"]["memory_count"],
|
|
139
|
+
"loaded_skills": baseline["budget"]["skill_count"],
|
|
140
|
+
"skill_level": baseline["budget"]["skill_level"],
|
|
141
|
+
"packet": baseline,
|
|
142
|
+
},
|
|
143
|
+
"savings": {
|
|
144
|
+
"estimated_tokens": savings,
|
|
145
|
+
"ratio": round(savings_ratio, 4),
|
|
146
|
+
"percent": round(savings_ratio * 100, 2),
|
|
147
|
+
},
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def _omissions(packet: dict[str, Any]) -> list[str]:
|
|
152
|
+
omissions: list[str] = []
|
|
153
|
+
if packet.get("task", {}).get("resume") and not packet.get("task", {}).get("brief"):
|
|
154
|
+
omissions.append("Task resume was requested but no task brief was loaded.")
|
|
155
|
+
if not packet["memory"]:
|
|
156
|
+
omissions.append("No relevant memory matched the request.")
|
|
157
|
+
if not packet["skills"]:
|
|
158
|
+
omissions.append("No skill contract matched the request.")
|
|
159
|
+
if packet["budget"]["over_budget"]:
|
|
160
|
+
omissions.append("Context packet exceeded the requested budget estimate.")
|
|
161
|
+
return omissions
|
context_kernel/evals.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from .budget import DEFAULT_PROFILE
|
|
8
|
+
from .context import ContextBuilder
|
|
9
|
+
from .models import utc_now
|
|
10
|
+
from .providers import get_provider
|
|
11
|
+
from .report_costs import build_eval_cost_report, diff_cost_reports
|
|
12
|
+
from .storage import Workspace
|
|
13
|
+
from .verifier import combine_verifications, verify_preflight, verify_response
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
REGRESSION_TOKEN_TOLERANCE = 10
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EvalRunner:
|
|
20
|
+
def __init__(self, workspace: Workspace):
|
|
21
|
+
self.workspace = workspace
|
|
22
|
+
self.context = ContextBuilder(workspace)
|
|
23
|
+
|
|
24
|
+
def run_fixture(
|
|
25
|
+
self,
|
|
26
|
+
path: Path,
|
|
27
|
+
default_budget: int | None = None,
|
|
28
|
+
default_profile: str = DEFAULT_PROFILE,
|
|
29
|
+
save: bool = True,
|
|
30
|
+
execute_provider: str | None = None,
|
|
31
|
+
execute_model: str | None = None,
|
|
32
|
+
execute_base_url: str | None = None,
|
|
33
|
+
) -> dict[str, Any]:
|
|
34
|
+
fixture = Workspace.read_json(path)
|
|
35
|
+
tasks = fixture.get("tasks", [])
|
|
36
|
+
if not isinstance(tasks, list) or not tasks:
|
|
37
|
+
raise ValueError("Eval fixture must contain a non-empty `tasks` list.")
|
|
38
|
+
|
|
39
|
+
reports = [
|
|
40
|
+
self._run_task(
|
|
41
|
+
task,
|
|
42
|
+
default_budget=default_budget,
|
|
43
|
+
default_profile=default_profile,
|
|
44
|
+
execute_provider=execute_provider,
|
|
45
|
+
execute_model=execute_model,
|
|
46
|
+
execute_base_url=execute_base_url,
|
|
47
|
+
)
|
|
48
|
+
for task in tasks
|
|
49
|
+
]
|
|
50
|
+
report = {
|
|
51
|
+
"id": uuid4().hex[:12],
|
|
52
|
+
"created_at": utc_now(),
|
|
53
|
+
"fixture": str(path),
|
|
54
|
+
"name": fixture.get("name", path.stem),
|
|
55
|
+
"execution": {
|
|
56
|
+
"enabled": bool(execute_provider),
|
|
57
|
+
"provider": execute_provider,
|
|
58
|
+
"model": execute_model,
|
|
59
|
+
},
|
|
60
|
+
"tasks": reports,
|
|
61
|
+
"summary": summarize_reports(reports),
|
|
62
|
+
}
|
|
63
|
+
if save:
|
|
64
|
+
self.save_report(report)
|
|
65
|
+
return report
|
|
66
|
+
|
|
67
|
+
def save_report(self, report: dict[str, Any]) -> Path:
|
|
68
|
+
self.workspace.evals_dir.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
path = self.workspace.evals_dir / f"{report['id']}.json"
|
|
70
|
+
Workspace.write_json(path, report)
|
|
71
|
+
return path
|
|
72
|
+
|
|
73
|
+
def list_reports(self) -> list[dict[str, Any]]:
|
|
74
|
+
self.workspace.evals_dir.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
reports: list[dict[str, Any]] = []
|
|
76
|
+
for path in sorted(self.workspace.evals_dir.glob("*.json")):
|
|
77
|
+
report = Workspace.read_json(path)
|
|
78
|
+
summary = report.get("summary", {})
|
|
79
|
+
reports.append(
|
|
80
|
+
{
|
|
81
|
+
"id": report.get("id", path.stem),
|
|
82
|
+
"created_at": report.get("created_at", ""),
|
|
83
|
+
"name": report.get("name", ""),
|
|
84
|
+
"fixture": report.get("fixture", ""),
|
|
85
|
+
"task_count": summary.get("task_count", 0),
|
|
86
|
+
"average_savings_percent": summary.get("average_savings_percent", 0),
|
|
87
|
+
"checks": f"{summary.get('passed_checks', 0)}/{summary.get('total_checks', 0)}",
|
|
88
|
+
"ok": summary.get("ok", False),
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
return sorted(reports, key=lambda item: item["created_at"], reverse=True)
|
|
92
|
+
|
|
93
|
+
def get_report(self, report_id: str) -> dict[str, Any]:
|
|
94
|
+
path = self.workspace.evals_dir / f"{report_id}.json"
|
|
95
|
+
if not path.exists():
|
|
96
|
+
raise KeyError(f"Unknown eval report: {report_id}")
|
|
97
|
+
return Workspace.read_json(path)
|
|
98
|
+
|
|
99
|
+
def diff_reports(self, before_id: str, after_id: str) -> dict[str, Any]:
|
|
100
|
+
before = self.get_report(before_id)
|
|
101
|
+
after = self.get_report(after_id)
|
|
102
|
+
return diff_reports(before, after)
|
|
103
|
+
|
|
104
|
+
def _run_task(
|
|
105
|
+
self,
|
|
106
|
+
task: dict[str, Any],
|
|
107
|
+
default_budget: int | None,
|
|
108
|
+
default_profile: str,
|
|
109
|
+
execute_provider: str | None,
|
|
110
|
+
execute_model: str | None,
|
|
111
|
+
execute_base_url: str | None,
|
|
112
|
+
) -> dict[str, Any]:
|
|
113
|
+
request = task.get("request")
|
|
114
|
+
if not request:
|
|
115
|
+
raise ValueError("Eval task is missing required field: request")
|
|
116
|
+
|
|
117
|
+
comparison = self.context.compare(
|
|
118
|
+
str(request),
|
|
119
|
+
total_budget=task.get("budget", default_budget),
|
|
120
|
+
profile=str(task.get("profile", default_profile)),
|
|
121
|
+
)
|
|
122
|
+
execution = execute_task(comparison, execute_provider, execute_model, execute_base_url)
|
|
123
|
+
checks = evaluate_checks(comparison, task, execution)
|
|
124
|
+
report = {
|
|
125
|
+
"id": str(task.get("id", request)),
|
|
126
|
+
"request": str(request),
|
|
127
|
+
"profile": comparison["profile"],
|
|
128
|
+
"budget": comparison["budget"],
|
|
129
|
+
"kernel": {
|
|
130
|
+
"estimated_tokens": comparison["kernel"]["estimated_tokens"],
|
|
131
|
+
"selected_memory": comparison["kernel"]["selected_memory"],
|
|
132
|
+
"selected_skills": comparison["kernel"]["selected_skills"],
|
|
133
|
+
},
|
|
134
|
+
"baseline": {
|
|
135
|
+
"estimated_tokens": comparison["baseline"]["estimated_tokens"],
|
|
136
|
+
"loaded_memory": comparison["baseline"]["loaded_memory"],
|
|
137
|
+
"loaded_skills": comparison["baseline"]["loaded_skills"],
|
|
138
|
+
},
|
|
139
|
+
"savings": comparison["savings"],
|
|
140
|
+
"checks": checks,
|
|
141
|
+
}
|
|
142
|
+
if execution:
|
|
143
|
+
report["execution"] = execution
|
|
144
|
+
return report
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def execute_task(
|
|
148
|
+
comparison: dict[str, Any],
|
|
149
|
+
provider_name: str | None,
|
|
150
|
+
model: str | None,
|
|
151
|
+
base_url: str | None,
|
|
152
|
+
) -> dict[str, Any] | None:
|
|
153
|
+
if not provider_name:
|
|
154
|
+
return None
|
|
155
|
+
packet = comparison["kernel"]["packet"]
|
|
156
|
+
preflight = verify_preflight(packet)
|
|
157
|
+
if not preflight["ok"]:
|
|
158
|
+
return {
|
|
159
|
+
"provider": provider_name,
|
|
160
|
+
"model": model,
|
|
161
|
+
"blocked": True,
|
|
162
|
+
"block_reason": "preflight_failed",
|
|
163
|
+
"response": "",
|
|
164
|
+
"input_tokens": 0,
|
|
165
|
+
"output_tokens": 0,
|
|
166
|
+
"total_tokens": 0,
|
|
167
|
+
"verifier": preflight,
|
|
168
|
+
}
|
|
169
|
+
provider = get_provider(provider_name, model=model, base_url=base_url)
|
|
170
|
+
response = provider.run(packet)
|
|
171
|
+
response_verifier = verify_response(response.text)
|
|
172
|
+
return {
|
|
173
|
+
"provider": provider.name,
|
|
174
|
+
"model": getattr(provider, "model", model),
|
|
175
|
+
"blocked": False,
|
|
176
|
+
"response": response.text,
|
|
177
|
+
"input_tokens": response.input_tokens,
|
|
178
|
+
"output_tokens": response.output_tokens,
|
|
179
|
+
"total_tokens": response.input_tokens + response.output_tokens,
|
|
180
|
+
"verifier": combine_verifications("eval_execution", preflight, response_verifier),
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def evaluate_checks(
|
|
185
|
+
comparison: dict[str, Any],
|
|
186
|
+
task: dict[str, Any],
|
|
187
|
+
execution: dict[str, Any] | None = None,
|
|
188
|
+
) -> dict[str, Any]:
|
|
189
|
+
packet = comparison["kernel"]["packet"]
|
|
190
|
+
selected_skill_ids = [item["contract"]["id"] for item in packet["skills"]]
|
|
191
|
+
selected_memory_text = " ".join(item["record"]["text"].lower() for item in packet["memory"])
|
|
192
|
+
|
|
193
|
+
checks: list[dict[str, Any]] = []
|
|
194
|
+
for skill_id in task.get("expected_skills", []):
|
|
195
|
+
checks.append(
|
|
196
|
+
{
|
|
197
|
+
"name": f"expected_skill:{skill_id}",
|
|
198
|
+
"passed": skill_id in selected_skill_ids,
|
|
199
|
+
}
|
|
200
|
+
)
|
|
201
|
+
for term in task.get("expected_memory_terms", []):
|
|
202
|
+
checks.append(
|
|
203
|
+
{
|
|
204
|
+
"name": f"expected_memory_term:{term}",
|
|
205
|
+
"passed": str(term).lower() in selected_memory_text,
|
|
206
|
+
}
|
|
207
|
+
)
|
|
208
|
+
if execution:
|
|
209
|
+
response_text = execution.get("response", "").lower()
|
|
210
|
+
for term in task.get("expected_response_terms", []):
|
|
211
|
+
checks.append(
|
|
212
|
+
{
|
|
213
|
+
"name": f"expected_response_term:{term}",
|
|
214
|
+
"passed": str(term).lower() in response_text,
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
minimum_savings = task.get("minimum_savings_percent")
|
|
218
|
+
if minimum_savings is not None:
|
|
219
|
+
checks.append(
|
|
220
|
+
{
|
|
221
|
+
"name": f"minimum_savings_percent:{minimum_savings}",
|
|
222
|
+
"passed": comparison["savings"]["percent"] >= float(minimum_savings),
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
passed = sum(1 for check in checks if check["passed"])
|
|
227
|
+
return {
|
|
228
|
+
"passed": passed,
|
|
229
|
+
"total": len(checks),
|
|
230
|
+
"ok": passed == len(checks),
|
|
231
|
+
"items": checks,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def summarize_reports(reports: list[dict[str, Any]]) -> dict[str, Any]:
|
|
236
|
+
total_kernel = sum(report["kernel"]["estimated_tokens"] for report in reports)
|
|
237
|
+
total_baseline = sum(report["baseline"]["estimated_tokens"] for report in reports)
|
|
238
|
+
total_savings = max(0, total_baseline - total_kernel)
|
|
239
|
+
total_checks = sum(report["checks"]["total"] for report in reports)
|
|
240
|
+
passed_checks = sum(report["checks"]["passed"] for report in reports)
|
|
241
|
+
total_execution_tokens = sum(report.get("execution", {}).get("total_tokens", 0) for report in reports)
|
|
242
|
+
executed_tasks = sum(1 for report in reports if report.get("execution") and not report["execution"].get("blocked"))
|
|
243
|
+
blocked_tasks = sum(1 for report in reports if report.get("execution", {}).get("blocked"))
|
|
244
|
+
average_savings = (
|
|
245
|
+
sum(report["savings"]["percent"] for report in reports) / len(reports)
|
|
246
|
+
if reports
|
|
247
|
+
else 0.0
|
|
248
|
+
)
|
|
249
|
+
return {
|
|
250
|
+
"task_count": len(reports),
|
|
251
|
+
"total_kernel_tokens": total_kernel,
|
|
252
|
+
"total_baseline_tokens": total_baseline,
|
|
253
|
+
"total_savings_tokens": total_savings,
|
|
254
|
+
"total_savings_percent": round((total_savings / total_baseline) * 100, 2) if total_baseline else 0.0,
|
|
255
|
+
"average_savings_percent": round(average_savings, 2),
|
|
256
|
+
"passed_checks": passed_checks,
|
|
257
|
+
"total_checks": total_checks,
|
|
258
|
+
"executed_tasks": executed_tasks,
|
|
259
|
+
"blocked_tasks": blocked_tasks,
|
|
260
|
+
"total_execution_tokens": total_execution_tokens,
|
|
261
|
+
"ok": passed_checks == total_checks,
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def diff_reports(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]:
|
|
266
|
+
before_summary = before.get("summary", {})
|
|
267
|
+
after_summary = after.get("summary", {})
|
|
268
|
+
task_diffs = diff_tasks(before.get("tasks", []), after.get("tasks", []))
|
|
269
|
+
cost_diff = diff_cost_reports(build_eval_cost_report(before), build_eval_cost_report(after))
|
|
270
|
+
regressions = [
|
|
271
|
+
item
|
|
272
|
+
for item in task_diffs
|
|
273
|
+
if item["status"] == "changed"
|
|
274
|
+
and (
|
|
275
|
+
item["kernel_token_delta"] > REGRESSION_TOKEN_TOLERANCE
|
|
276
|
+
or item["savings_percent_delta"] < 0
|
|
277
|
+
or item["passed_check_delta"] < 0
|
|
278
|
+
)
|
|
279
|
+
]
|
|
280
|
+
return {
|
|
281
|
+
"before": {
|
|
282
|
+
"id": before.get("id"),
|
|
283
|
+
"created_at": before.get("created_at"),
|
|
284
|
+
"name": before.get("name"),
|
|
285
|
+
},
|
|
286
|
+
"after": {
|
|
287
|
+
"id": after.get("id"),
|
|
288
|
+
"created_at": after.get("created_at"),
|
|
289
|
+
"name": after.get("name"),
|
|
290
|
+
},
|
|
291
|
+
"summary_delta": {
|
|
292
|
+
"kernel_tokens": delta(before_summary, after_summary, "total_kernel_tokens"),
|
|
293
|
+
"baseline_tokens": delta(before_summary, after_summary, "total_baseline_tokens"),
|
|
294
|
+
"savings_tokens": delta(before_summary, after_summary, "total_savings_tokens"),
|
|
295
|
+
"savings_percent": round(delta(before_summary, after_summary, "total_savings_percent"), 2),
|
|
296
|
+
"passed_checks": delta(before_summary, after_summary, "passed_checks"),
|
|
297
|
+
"total_checks": delta(before_summary, after_summary, "total_checks"),
|
|
298
|
+
},
|
|
299
|
+
"cost_diff": cost_diff,
|
|
300
|
+
"cost_regressions": cost_diff["regressions"],
|
|
301
|
+
"tasks": task_diffs,
|
|
302
|
+
"regressions": regressions,
|
|
303
|
+
"ok": not regressions and cost_diff["ok"],
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def diff_tasks(before_tasks: list[dict[str, Any]], after_tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
308
|
+
before_by_id = {task["id"]: task for task in before_tasks}
|
|
309
|
+
after_by_id = {task["id"]: task for task in after_tasks}
|
|
310
|
+
task_ids = sorted(set(before_by_id).union(after_by_id))
|
|
311
|
+
diffs: list[dict[str, Any]] = []
|
|
312
|
+
for task_id in task_ids:
|
|
313
|
+
before = before_by_id.get(task_id)
|
|
314
|
+
after = after_by_id.get(task_id)
|
|
315
|
+
if before is None:
|
|
316
|
+
diffs.append({"id": task_id, "status": "added"})
|
|
317
|
+
continue
|
|
318
|
+
if after is None:
|
|
319
|
+
diffs.append({"id": task_id, "status": "removed"})
|
|
320
|
+
continue
|
|
321
|
+
diffs.append(
|
|
322
|
+
{
|
|
323
|
+
"id": task_id,
|
|
324
|
+
"status": "changed",
|
|
325
|
+
"kernel_token_delta": after["kernel"]["estimated_tokens"] - before["kernel"]["estimated_tokens"],
|
|
326
|
+
"baseline_token_delta": after["baseline"]["estimated_tokens"] - before["baseline"]["estimated_tokens"],
|
|
327
|
+
"savings_percent_delta": round(after["savings"]["percent"] - before["savings"]["percent"], 2),
|
|
328
|
+
"passed_check_delta": after["checks"]["passed"] - before["checks"]["passed"],
|
|
329
|
+
"total_check_delta": after["checks"]["total"] - before["checks"]["total"],
|
|
330
|
+
"before": task_snapshot(before),
|
|
331
|
+
"after": task_snapshot(after),
|
|
332
|
+
}
|
|
333
|
+
)
|
|
334
|
+
return diffs
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def task_snapshot(task: dict[str, Any]) -> dict[str, Any]:
|
|
338
|
+
return {
|
|
339
|
+
"kernel_tokens": task["kernel"]["estimated_tokens"],
|
|
340
|
+
"baseline_tokens": task["baseline"]["estimated_tokens"],
|
|
341
|
+
"savings_percent": task["savings"]["percent"],
|
|
342
|
+
"checks": f"{task['checks']['passed']}/{task['checks']['total']}",
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def delta(before: dict[str, Any], after: dict[str, Any], key: str) -> int | float:
|
|
347
|
+
return after.get(key, 0) - before.get(key, 0)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .memory import MemoryStore
|
|
7
|
+
from .storage import Workspace
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def global_workspace(root: Path | None = None) -> Workspace:
|
|
11
|
+
workspace = Workspace(root or Path.home() / ".context-kernel" / "global")
|
|
12
|
+
workspace.init()
|
|
13
|
+
return workspace
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def push_global_memories(
|
|
17
|
+
workspace: Workspace,
|
|
18
|
+
*,
|
|
19
|
+
kind: str | None = None,
|
|
20
|
+
namespace: str | None = None,
|
|
21
|
+
tag: str | None = None,
|
|
22
|
+
dry_run: bool = False,
|
|
23
|
+
global_root: Path | None = None,
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
source = MemoryStore(workspace)
|
|
26
|
+
global_ws = global_workspace(global_root)
|
|
27
|
+
target = MemoryStore(global_ws)
|
|
28
|
+
pushed = []
|
|
29
|
+
project_name = workspace.root.name
|
|
30
|
+
sync_namespace = normalize_namespace(namespace or project_name)
|
|
31
|
+
for record in filter_records(source.all(kind=kind), tag=tag):
|
|
32
|
+
tags = sorted(
|
|
33
|
+
set(record.tags).union(
|
|
34
|
+
{
|
|
35
|
+
"global",
|
|
36
|
+
f"namespace:{sync_namespace}",
|
|
37
|
+
f"source:{project_name}",
|
|
38
|
+
f"source_project:{project_name}",
|
|
39
|
+
f"source_root:{workspace.root.as_posix()}",
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
)
|
|
43
|
+
if dry_run:
|
|
44
|
+
copied = record.to_dict()
|
|
45
|
+
copied["tags"] = tags
|
|
46
|
+
else:
|
|
47
|
+
copied = target.add(record.kind, record.text, tags=tags).to_dict()
|
|
48
|
+
pushed.append(copied)
|
|
49
|
+
return {
|
|
50
|
+
"direction": "push",
|
|
51
|
+
"dry_run": dry_run,
|
|
52
|
+
"namespace": sync_namespace,
|
|
53
|
+
"source": str(workspace.root),
|
|
54
|
+
"target": str(global_ws.root),
|
|
55
|
+
"count": 0 if dry_run else len(pushed),
|
|
56
|
+
"candidate_count": len(pushed),
|
|
57
|
+
"records": pushed,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def pull_global_memories(
|
|
62
|
+
workspace: Workspace,
|
|
63
|
+
*,
|
|
64
|
+
kind: str | None = None,
|
|
65
|
+
namespace: str | None = None,
|
|
66
|
+
source_project: str | None = None,
|
|
67
|
+
tag: str | None = None,
|
|
68
|
+
limit: int | None = None,
|
|
69
|
+
dry_run: bool = False,
|
|
70
|
+
global_root: Path | None = None,
|
|
71
|
+
) -> dict[str, Any]:
|
|
72
|
+
global_ws = global_workspace(global_root)
|
|
73
|
+
source = MemoryStore(global_ws)
|
|
74
|
+
target = MemoryStore(workspace)
|
|
75
|
+
records = filter_records(
|
|
76
|
+
source.all(kind=kind),
|
|
77
|
+
namespace=namespace,
|
|
78
|
+
source_project=source_project,
|
|
79
|
+
tag=tag,
|
|
80
|
+
)
|
|
81
|
+
if limit is not None:
|
|
82
|
+
records = records[: max(0, limit)]
|
|
83
|
+
pulled = []
|
|
84
|
+
for record in records:
|
|
85
|
+
tags = sorted(set(record.tags).union({"global", "imported_global"}))
|
|
86
|
+
if dry_run:
|
|
87
|
+
copied = record.to_dict()
|
|
88
|
+
copied["tags"] = tags
|
|
89
|
+
else:
|
|
90
|
+
copied = target.add(record.kind, record.text, tags=tags).to_dict()
|
|
91
|
+
pulled.append(copied)
|
|
92
|
+
return {
|
|
93
|
+
"direction": "pull",
|
|
94
|
+
"dry_run": dry_run,
|
|
95
|
+
"namespace": normalize_namespace(namespace) if namespace else None,
|
|
96
|
+
"source_project": source_project,
|
|
97
|
+
"source": str(global_ws.root),
|
|
98
|
+
"target": str(workspace.root),
|
|
99
|
+
"count": 0 if dry_run else len(pulled),
|
|
100
|
+
"candidate_count": len(pulled),
|
|
101
|
+
"records": pulled,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def filter_records(
|
|
106
|
+
records: list[Any],
|
|
107
|
+
*,
|
|
108
|
+
namespace: str | None = None,
|
|
109
|
+
source_project: str | None = None,
|
|
110
|
+
tag: str | None = None,
|
|
111
|
+
) -> list[Any]:
|
|
112
|
+
filtered = records
|
|
113
|
+
if namespace:
|
|
114
|
+
namespace_tag = f"namespace:{normalize_namespace(namespace)}"
|
|
115
|
+
filtered = [record for record in filtered if namespace_tag in record.tags]
|
|
116
|
+
if source_project:
|
|
117
|
+
source_tags = {f"source_project:{source_project}", f"source:{source_project}"}
|
|
118
|
+
filtered = [record for record in filtered if source_tags.intersection(record.tags)]
|
|
119
|
+
if tag:
|
|
120
|
+
filtered = [record for record in filtered if tag in record.tags]
|
|
121
|
+
return filtered
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def normalize_namespace(value: str) -> str:
|
|
125
|
+
normalized = "-".join(str(value).strip().split()).lower()
|
|
126
|
+
return normalized or "default"
|