@0dai-dev/cli 4.3.5 → 4.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -11
- package/bin/0dai.js +214 -40
- package/lib/ai/manifest/mcp-exposure-contract.json +121 -0
- package/lib/ai/meta/manifest/mcp-tool-tiers.json +435 -0
- package/lib/ai/registry/mcp-catalog.json +98 -0
- package/lib/commands/auth.js +55 -1
- package/lib/commands/compliance.js +1 -1
- package/lib/commands/detect.js +10 -4
- package/lib/commands/doctor.js +545 -26
- package/lib/commands/experience.js +40 -5
- package/lib/commands/export.js +73 -0
- package/lib/commands/feedback.js +157 -15
- package/lib/commands/gh.js +26 -0
- package/lib/commands/graph.js +9 -4
- package/lib/commands/heatmap.js +1 -1
- package/lib/commands/init.js +222 -30
- package/lib/commands/mcp.js +129 -21
- package/lib/commands/models.js +138 -41
- package/lib/commands/provider.js +30 -59
- package/lib/commands/quota.js +1 -1
- package/lib/commands/receipt.js +1 -1
- package/lib/commands/run.js +18 -7
- package/lib/commands/runner.js +31 -1
- package/lib/commands/status.js +44 -11
- package/lib/commands/swarm.js +130 -12
- package/lib/commands/trust.js +286 -0
- package/lib/commands/update.js +184 -38
- package/lib/commands/usage.js +1 -1
- package/lib/commands/validate.js +32 -3
- package/lib/commands/vault.js +46 -9
- package/lib/python/__init__.py +0 -0
- package/lib/python/agent_quotas.py +525 -0
- package/lib/python/anomaly_alert.py +397 -0
- package/lib/python/anti_pattern_detector.py +799 -0
- package/lib/python/auth.py +443 -0
- package/lib/python/capi_profile_guard.py +477 -0
- package/lib/python/compliance_report.py +581 -0
- package/lib/python/drift_detector.py +388 -0
- package/lib/python/experience_pipeline.py +1130 -0
- package/lib/python/graph.py +19 -0
- package/lib/python/graph_core.py +293 -0
- package/lib/python/graph_io.py +179 -0
- package/lib/python/graph_legacy.py +2052 -0
- package/lib/python/graph_legacy_helpers.py +221 -0
- package/lib/python/graph_outcomes_core.py +85 -0
- package/lib/python/graph_queries.py +171 -0
- package/lib/python/graph_slice.py +198 -0
- package/lib/python/graph_slicer.py +576 -0
- package/lib/python/graph_slicer_cli.py +60 -0
- package/lib/python/graph_validation.py +64 -0
- package/lib/python/heatmap.py +934 -0
- package/lib/python/json_utils.py +193 -0
- package/lib/python/mcp_exposure_check.py +247 -0
- package/lib/python/model_router.py +1434 -0
- package/lib/python/project_manager.py +621 -0
- package/lib/python/provider_profiles.py +1618 -0
- package/lib/python/provider_registry.py +1211 -0
- package/lib/python/provider_registry_cli.py +125 -0
- package/lib/python/receipt_png.py +727 -0
- package/lib/python/structural_memory.py +325 -0
- package/lib/python/swarm_cost.py +177 -0
- package/lib/python/usage_ledger.py +569 -0
- package/lib/scripts/mcp_tier_config.py +240 -0
- package/lib/shared.js +97 -14
- package/lib/tui/index.mjs +35174 -0
- package/lib/utils/activation_telemetry.js +230 -11
- package/lib/utils/constants.js +7 -1
- package/lib/utils/export-bundler.js +285 -0
- package/lib/utils/identity.js +198 -1
- package/lib/utils/mcp-auth.js +81 -15
- package/lib/utils/plan.js +1 -1
- package/lib/vault/index.js +19 -3
- package/lib/vault/storage.js +21 -2
- package/lib/wizard.js +5 -2
- package/package.json +9 -3
- package/scripts/build-python-bundle.js +106 -0
- package/scripts/build-tui.js +14 -1
- package/scripts/harvest_experience.py +523 -0
- package/scripts/postinstall.js +15 -9
|
@@ -0,0 +1,1130 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Unified structured experience event pipeline for local and server storage."""
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import datetime as dt
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import pathlib
|
|
11
|
+
import re
|
|
12
|
+
import urllib.error
|
|
13
|
+
import urllib.request
|
|
14
|
+
|
|
15
|
+
try: # PyYAML is optional — only the sync contribution-gate reads project.yaml.
|
|
16
|
+
import yaml
|
|
17
|
+
except ImportError: # npm-installed users may lack PyYAML; degrade, don't crash (#4363).
|
|
18
|
+
yaml = None
|
|
19
|
+
|
|
20
|
+
import auth
|
|
21
|
+
import project_manager as pm
|
|
22
|
+
from json_utils import append_jsonl, load_json, load_jsonl, save_json
|
|
23
|
+
|
|
24
|
+
API_BASE = os.environ.get("ODAI_API_URL", "https://api.0dai.dev")
|
|
25
|
+
SERVER_EXPERIENCE_DIR = pathlib.Path.home() / ".0dai" / "db" / "experience"
|
|
26
|
+
ARCHIVE_AFTER_DAYS = 90
|
|
27
|
+
SYNC_BATCH_LIMIT = 200
|
|
28
|
+
SYNC_RETENTION_IDS = 200
|
|
29
|
+
SUPPORTED_EVENT_TYPES = {
|
|
30
|
+
"task_completed",
|
|
31
|
+
"task_failed",
|
|
32
|
+
"session_saved",
|
|
33
|
+
"session_resumed",
|
|
34
|
+
"graph_synced",
|
|
35
|
+
"decision_made",
|
|
36
|
+
"config_generated",
|
|
37
|
+
"doctor_run",
|
|
38
|
+
}
|
|
39
|
+
SUCCESS_RESULTS = {"success", "partial"}
|
|
40
|
+
FAIL_RESULTS = {"failure", "timeout", "stuck"}
|
|
41
|
+
CODE_PATTERNS = (
|
|
42
|
+
"```",
|
|
43
|
+
"def ",
|
|
44
|
+
"class ",
|
|
45
|
+
"function ",
|
|
46
|
+
"const ",
|
|
47
|
+
"import ",
|
|
48
|
+
"export ",
|
|
49
|
+
"return ",
|
|
50
|
+
"{",
|
|
51
|
+
"}",
|
|
52
|
+
";",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _detect_target(target: str | pathlib.Path = ".") -> pathlib.Path:
|
|
57
|
+
return pathlib.Path(target).resolve()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _now_dt() -> dt.datetime:
|
|
61
|
+
return dt.datetime.now(dt.timezone.utc).replace(microsecond=0)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _now() -> str:
|
|
65
|
+
return _now_dt().isoformat().replace("+00:00", "Z")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _dt_from_iso(value: str | None) -> dt.datetime | None:
|
|
69
|
+
if not value:
|
|
70
|
+
return None
|
|
71
|
+
try:
|
|
72
|
+
parsed = dt.datetime.fromisoformat(str(value).replace("Z", "+00:00"))
|
|
73
|
+
except ValueError:
|
|
74
|
+
return None
|
|
75
|
+
if parsed.tzinfo is None:
|
|
76
|
+
parsed = parsed.replace(tzinfo=dt.timezone.utc)
|
|
77
|
+
return parsed.astimezone(dt.timezone.utc)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _days_ago(value: str | None) -> int | None:
|
|
81
|
+
parsed = _dt_from_iso(value)
|
|
82
|
+
if not parsed:
|
|
83
|
+
return None
|
|
84
|
+
return max(0, int((_now_dt() - parsed).total_seconds() // 86400))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _safe_div(num: float, den: float) -> float:
|
|
88
|
+
if not den:
|
|
89
|
+
return 0.0
|
|
90
|
+
return round(num / den, 4)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _looks_like_code(text: str) -> bool:
|
|
94
|
+
lowered = text.lower()
|
|
95
|
+
if "\n" in text:
|
|
96
|
+
return True
|
|
97
|
+
return any(token in lowered for token in CODE_PATTERNS)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def sanitize_text(value: object, *, limit: int = 180) -> str:
|
|
101
|
+
text = " ".join(str(value or "").strip().split())
|
|
102
|
+
if not text or _looks_like_code(text):
|
|
103
|
+
return ""
|
|
104
|
+
return text[:limit]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _sanitize_result(value: object) -> str:
|
|
108
|
+
lowered = sanitize_text(value, limit=24).lower()
|
|
109
|
+
if lowered in {"success", "failure", "partial", "timeout", "stuck"}:
|
|
110
|
+
return lowered
|
|
111
|
+
if lowered in {"done", "ok", "passed"}:
|
|
112
|
+
return "success"
|
|
113
|
+
if lowered in {"failed", "error"}:
|
|
114
|
+
return "failure"
|
|
115
|
+
return "unknown"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _project_id(target: pathlib.Path) -> str:
|
|
119
|
+
return str(pm._identity(target).get("project_id") or "")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _stack(target: pathlib.Path) -> str:
|
|
123
|
+
discovery = load_json(target / "ai" / "manifest" / "discovery.json")
|
|
124
|
+
return str(discovery.get("stack") or pm._identity(target).get("stack") or "unknown")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _events_root(target: pathlib.Path) -> pathlib.Path:
|
|
128
|
+
return target / "ai" / "experience"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _events_dir(target: pathlib.Path) -> pathlib.Path:
|
|
132
|
+
return _events_root(target) / "events"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _archive_dir(target: pathlib.Path) -> pathlib.Path:
|
|
136
|
+
return _events_root(target) / "archive"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _sync_state_path(target: pathlib.Path) -> pathlib.Path:
|
|
140
|
+
return _events_root(target) / "sync_state.json"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _event_path_for(target: pathlib.Path, timestamp: str) -> pathlib.Path:
|
|
144
|
+
parsed = _dt_from_iso(timestamp) or _now_dt()
|
|
145
|
+
return _events_dir(target) / f"{parsed.date().isoformat()}.jsonl"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _server_event_path(project_id: str, timestamp: str) -> pathlib.Path:
|
|
149
|
+
parsed = _dt_from_iso(timestamp) or _now_dt()
|
|
150
|
+
return SERVER_EXPERIENCE_DIR / project_id / f"{parsed.date().isoformat()}.jsonl"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _iter_event_files(target: pathlib.Path, *, include_archive: bool = False) -> list[pathlib.Path]:
|
|
154
|
+
files = sorted(_events_dir(target).glob("*.jsonl"))
|
|
155
|
+
if include_archive:
|
|
156
|
+
files.extend(sorted(_archive_dir(target).glob("*.jsonl")))
|
|
157
|
+
return sorted(files)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def rotate_archives(target: pathlib.Path, *, max_age_days: int = ARCHIVE_AFTER_DAYS) -> int:
|
|
161
|
+
moved = 0
|
|
162
|
+
events_dir = _events_dir(target)
|
|
163
|
+
archive_dir = _archive_dir(target)
|
|
164
|
+
archive_dir.mkdir(parents=True, exist_ok=True)
|
|
165
|
+
cutoff = _now_dt().date() - dt.timedelta(days=max_age_days)
|
|
166
|
+
for path in sorted(events_dir.glob("*.jsonl")):
|
|
167
|
+
try:
|
|
168
|
+
file_date = dt.date.fromisoformat(path.stem)
|
|
169
|
+
except ValueError:
|
|
170
|
+
continue
|
|
171
|
+
if file_date >= cutoff:
|
|
172
|
+
continue
|
|
173
|
+
dest = archive_dir / path.name
|
|
174
|
+
if dest.exists():
|
|
175
|
+
with dest.open("a", encoding="utf-8") as handle:
|
|
176
|
+
handle.write(path.read_text(encoding="utf-8"))
|
|
177
|
+
path.unlink(missing_ok=True)
|
|
178
|
+
else:
|
|
179
|
+
os.replace(str(path), str(dest))
|
|
180
|
+
moved += 1
|
|
181
|
+
return moved
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _load_sync_state(target: pathlib.Path) -> dict:
|
|
185
|
+
return load_json(_sync_state_path(target))
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _save_sync_state(target: pathlib.Path, data: dict) -> None:
|
|
189
|
+
save_json(_sync_state_path(target), data)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _event_identity(event: dict) -> tuple[str, str]:
|
|
193
|
+
return str(event.get("timestamp") or ""), str(event.get("event_id") or "")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _event_sort_key(event: dict) -> tuple[str, str]:
|
|
197
|
+
return _event_identity(event)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _build_event_id(event_type: str, timestamp: str, agent: str, session_id: str, goal: str) -> str:
|
|
201
|
+
seed = json.dumps(
|
|
202
|
+
{
|
|
203
|
+
"event_type": event_type,
|
|
204
|
+
"timestamp": timestamp,
|
|
205
|
+
"agent": agent,
|
|
206
|
+
"session_id": session_id,
|
|
207
|
+
"goal": goal,
|
|
208
|
+
},
|
|
209
|
+
sort_keys=True,
|
|
210
|
+
ensure_ascii=False,
|
|
211
|
+
)
|
|
212
|
+
return "exp_" + hashlib.sha256(seed.encode("utf-8")).hexdigest()[:12]
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _normalize_task_type(raw: object, goal: str = "") -> str:
|
|
216
|
+
task_type = sanitize_text(raw, limit=32).lower().replace("_", "-")
|
|
217
|
+
if task_type in {"feat", "fix", "refactor", "test", "docs", "review"}:
|
|
218
|
+
return task_type
|
|
219
|
+
joined = f"{task_type} {goal}".lower()
|
|
220
|
+
if any(token in joined for token in ("test", "spec", "coverage")):
|
|
221
|
+
return "test"
|
|
222
|
+
if any(token in joined for token in ("doc", "readme", "comment")):
|
|
223
|
+
return "docs"
|
|
224
|
+
if any(token in joined for token in ("review", "audit", "analysis")):
|
|
225
|
+
return "review"
|
|
226
|
+
if any(token in joined for token in ("refactor", "cleanup", "rename")):
|
|
227
|
+
return "refactor"
|
|
228
|
+
if any(token in joined for token in ("bug", "fix", "hotfix", "regression")):
|
|
229
|
+
return "fix"
|
|
230
|
+
return "feat"
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _read_task_counts(target: pathlib.Path) -> tuple[str, str]:
|
|
234
|
+
code = ""
|
|
235
|
+
branch = ""
|
|
236
|
+
try:
|
|
237
|
+
import subprocess
|
|
238
|
+
|
|
239
|
+
result = subprocess.run(
|
|
240
|
+
["git", "branch", "--show-current"],
|
|
241
|
+
cwd=target,
|
|
242
|
+
capture_output=True,
|
|
243
|
+
text=True,
|
|
244
|
+
check=False,
|
|
245
|
+
timeout=10,
|
|
246
|
+
)
|
|
247
|
+
branch = result.stdout.strip() if result.returncode == 0 else ""
|
|
248
|
+
result = subprocess.run(
|
|
249
|
+
["git", "rev-parse", "--short", "HEAD"],
|
|
250
|
+
cwd=target,
|
|
251
|
+
capture_output=True,
|
|
252
|
+
text=True,
|
|
253
|
+
check=False,
|
|
254
|
+
timeout=10,
|
|
255
|
+
)
|
|
256
|
+
code = result.stdout.strip() if result.returncode == 0 else ""
|
|
257
|
+
except Exception: # noqa: BLE001 — git metadata probe failed
|
|
258
|
+
pass
|
|
259
|
+
return branch, code
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _active_session_id(target: pathlib.Path) -> str:
|
|
263
|
+
active = load_json(target / "ai" / "sessions" / "active.json")
|
|
264
|
+
return str(active.get("session_id") or active.get("id") or "")
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _quality_block(task: dict) -> dict:
|
|
268
|
+
tests_passed = bool(task.get("tests_passed")) if "tests_passed" in task else _sanitize_result(task.get("result") or task.get("status")) == "success"
|
|
269
|
+
return {
|
|
270
|
+
"lint_clean": bool(task.get("lint_clean", tests_passed)),
|
|
271
|
+
"no_secrets": bool(task.get("no_secrets", True)),
|
|
272
|
+
"commit_message_valid": bool(task.get("commit_message_valid", True)),
|
|
273
|
+
"acceptance_criteria_met": bool(task.get("acceptance_criteria_met", tests_passed)),
|
|
274
|
+
"review_needed": bool(task.get("review_needed", False)),
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _normalize_event(target: pathlib.Path, payload: dict) -> dict:
|
|
279
|
+
timestamp = str(payload.get("timestamp") or _now())
|
|
280
|
+
event_type = str(payload.get("event_type") or "").strip() or "task_completed"
|
|
281
|
+
if event_type not in SUPPORTED_EVENT_TYPES:
|
|
282
|
+
event_type = "task_completed"
|
|
283
|
+
branch, commit_hash = _read_task_counts(target)
|
|
284
|
+
task = dict(payload.get("task") or {})
|
|
285
|
+
context = dict(payload.get("context") or {})
|
|
286
|
+
quality = dict(payload.get("quality") or {})
|
|
287
|
+
goal = sanitize_text((task.get("goal") or payload.get("goal") or payload.get("summary") or payload.get("title") or ""), limit=160)
|
|
288
|
+
task_result = _sanitize_result(task.get("result") or payload.get("result") or payload.get("status"))
|
|
289
|
+
if event_type == "task_failed" and task_result == "unknown":
|
|
290
|
+
task_result = "failure"
|
|
291
|
+
if event_type == "task_completed" and task_result == "unknown":
|
|
292
|
+
task_result = "success"
|
|
293
|
+
|
|
294
|
+
normalized = {
|
|
295
|
+
"event_id": str(payload.get("event_id") or _build_event_id(
|
|
296
|
+
event_type,
|
|
297
|
+
timestamp,
|
|
298
|
+
str(payload.get("agent") or task.get("agent") or ""),
|
|
299
|
+
str(payload.get("session_id") or _active_session_id(target)),
|
|
300
|
+
goal,
|
|
301
|
+
)),
|
|
302
|
+
"timestamp": timestamp,
|
|
303
|
+
"project_id": str(payload.get("project_id") or _project_id(target)),
|
|
304
|
+
"session_id": str(payload.get("session_id") or _active_session_id(target)),
|
|
305
|
+
"event_type": event_type,
|
|
306
|
+
"agent": str(payload.get("agent") or task.get("agent") or "unknown"),
|
|
307
|
+
"model": sanitize_text(payload.get("model") or task.get("model") or "unknown", limit=48) or "unknown",
|
|
308
|
+
"effort": str(payload.get("effort") or task.get("effort") or "medium"),
|
|
309
|
+
"task": {
|
|
310
|
+
"task_id": str(task.get("task_id") or payload.get("task_id") or ""),
|
|
311
|
+
"goal": goal,
|
|
312
|
+
"task_type": _normalize_task_type(task.get("task_type") or task.get("type") or payload.get("task_type"), goal),
|
|
313
|
+
"result": task_result,
|
|
314
|
+
"elapsed_seconds": int(float(task.get("elapsed_seconds") or payload.get("elapsed_seconds") or 0) or 0),
|
|
315
|
+
"cost_usd": round(float(task.get("cost_usd") or task.get("cost") or payload.get("cost_usd") or 0) or 0, 4),
|
|
316
|
+
"tokens_in": int(float(task.get("tokens_in") or payload.get("tokens_in") or 0) or 0),
|
|
317
|
+
"tokens_out": int(float(task.get("tokens_out") or payload.get("tokens_out") or 0) or 0),
|
|
318
|
+
},
|
|
319
|
+
"context": {
|
|
320
|
+
"stack": sanitize_text(context.get("stack") or _stack(target), limit=48) or "unknown",
|
|
321
|
+
"files_touched": int(float(context.get("files_touched") or payload.get("files_touched") or 0) or 0),
|
|
322
|
+
"tests_passed": bool(context.get("tests_passed", task_result == "success")),
|
|
323
|
+
"commit_hash": sanitize_text(context.get("commit_hash") or commit_hash, limit=16),
|
|
324
|
+
"branch": sanitize_text(context.get("branch") or branch, limit=64),
|
|
325
|
+
"graph_nodes_used": int(float(context.get("graph_nodes_used") or 0) or 0),
|
|
326
|
+
"graph_edges_used": int(float(context.get("graph_edges_used") or 0) or 0),
|
|
327
|
+
},
|
|
328
|
+
"quality": {
|
|
329
|
+
**_quality_block(task),
|
|
330
|
+
**{
|
|
331
|
+
"lint_clean": bool(quality.get("lint_clean", _quality_block(task)["lint_clean"])),
|
|
332
|
+
"no_secrets": bool(quality.get("no_secrets", _quality_block(task)["no_secrets"])),
|
|
333
|
+
"commit_message_valid": bool(quality.get("commit_message_valid", _quality_block(task)["commit_message_valid"])),
|
|
334
|
+
"acceptance_criteria_met": bool(quality.get("acceptance_criteria_met", _quality_block(task)["acceptance_criteria_met"])),
|
|
335
|
+
"review_needed": bool(quality.get("review_needed", _quality_block(task)["review_needed"])),
|
|
336
|
+
},
|
|
337
|
+
},
|
|
338
|
+
}
|
|
339
|
+
return normalized
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def record_event(target: pathlib.Path | str, payload: dict) -> dict:
|
|
343
|
+
target_path = _detect_target(target)
|
|
344
|
+
rotate_archives(target_path)
|
|
345
|
+
event = _normalize_event(target_path, payload)
|
|
346
|
+
path = _event_path_for(target_path, str(event.get("timestamp") or _now()))
|
|
347
|
+
append_jsonl(path, event)
|
|
348
|
+
return event
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def build_swarm_event(target: pathlib.Path | str, task: dict, agent: str, elapsed: float, event_type: str | None = None) -> dict:
|
|
352
|
+
target_path = _detect_target(target)
|
|
353
|
+
status = str(task.get("status") or "").lower()
|
|
354
|
+
error_kind = str(task.get("agent_error_kind") or "").lower()
|
|
355
|
+
result = "success"
|
|
356
|
+
resolved_event_type = event_type
|
|
357
|
+
if error_kind == "timeout":
|
|
358
|
+
result = "timeout"
|
|
359
|
+
resolved_event_type = resolved_event_type or "task_failed"
|
|
360
|
+
elif status == "failed" or task.get("error"):
|
|
361
|
+
result = "failure"
|
|
362
|
+
resolved_event_type = resolved_event_type or "task_failed"
|
|
363
|
+
else:
|
|
364
|
+
resolved_event_type = resolved_event_type or "task_completed"
|
|
365
|
+
|
|
366
|
+
files = task.get("files") or task.get("context", {}).get("files") or []
|
|
367
|
+
if isinstance(files, str):
|
|
368
|
+
files = [files]
|
|
369
|
+
context = {
|
|
370
|
+
"stack": _stack(target_path),
|
|
371
|
+
"files_touched": len([f for f in files if f]),
|
|
372
|
+
"tests_passed": result == "success",
|
|
373
|
+
"graph_nodes_used": 1 if task.get("graph_context_used") else 0,
|
|
374
|
+
"graph_edges_used": 1 if task.get("graph_mutation_applied") else 0,
|
|
375
|
+
}
|
|
376
|
+
return {
|
|
377
|
+
"event_type": resolved_event_type,
|
|
378
|
+
"session_id": str(task.get("session_id") or _active_session_id(target_path)),
|
|
379
|
+
"agent": agent,
|
|
380
|
+
"model": str(task.get("model") or task.get("tier") or "unknown"),
|
|
381
|
+
"effort": str(task.get("effort") or "medium"),
|
|
382
|
+
"task": {
|
|
383
|
+
"task_id": str(task.get("id") or ""),
|
|
384
|
+
"goal": str(task.get("title") or task.get("goal") or ""),
|
|
385
|
+
"task_type": str(task.get("type") or ""),
|
|
386
|
+
"result": result,
|
|
387
|
+
"elapsed_seconds": int(float(elapsed or task.get("elapsed_seconds") or 0) or 0),
|
|
388
|
+
"cost_usd": float(task.get("cost") or 0),
|
|
389
|
+
"tokens_in": int(float(task.get("tokens_in") or task.get("tokens") or 0) or 0),
|
|
390
|
+
"tokens_out": int(float(task.get("tokens_out") or 0) or 0),
|
|
391
|
+
},
|
|
392
|
+
"context": context,
|
|
393
|
+
"quality": _quality_block(task),
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def build_session_event(target: pathlib.Path | str, event_type: str, payload: dict, *, agent: str | None = None) -> dict:
|
|
398
|
+
target_path = _detect_target(target)
|
|
399
|
+
goal = payload.get("goal") or {}
|
|
400
|
+
plan = payload.get("plan") or {}
|
|
401
|
+
context = payload.get("context") or {}
|
|
402
|
+
files_modified = context.get("files_modified") or []
|
|
403
|
+
return {
|
|
404
|
+
"event_type": event_type,
|
|
405
|
+
"session_id": str(payload.get("session_id") or payload.get("id") or _active_session_id(target_path)),
|
|
406
|
+
"agent": str(agent or payload.get("saved_by_agent") or payload.get("current_agent") or "unknown"),
|
|
407
|
+
"model": str(payload.get("saved_by_model") or payload.get("current_model") or "unknown"),
|
|
408
|
+
"effort": "medium",
|
|
409
|
+
"task": {
|
|
410
|
+
"task_id": str(payload.get("session_id") or payload.get("id") or ""),
|
|
411
|
+
"goal": str(goal.get("refined") or goal.get("original") or ""),
|
|
412
|
+
"task_type": "review",
|
|
413
|
+
"result": "success",
|
|
414
|
+
"elapsed_seconds": 0,
|
|
415
|
+
"cost_usd": 0,
|
|
416
|
+
"tokens_in": 0,
|
|
417
|
+
"tokens_out": 0,
|
|
418
|
+
},
|
|
419
|
+
"context": {
|
|
420
|
+
"stack": _stack(target_path),
|
|
421
|
+
"files_touched": len(files_modified) if isinstance(files_modified, list) else 0,
|
|
422
|
+
"tests_passed": True,
|
|
423
|
+
"graph_nodes_used": len((payload.get("graph_context") or {}).get("relevant_nodes") or []),
|
|
424
|
+
"graph_edges_used": len((payload.get("graph_context") or {}).get("relevant_edges") or []),
|
|
425
|
+
},
|
|
426
|
+
"quality": {
|
|
427
|
+
"lint_clean": True,
|
|
428
|
+
"no_secrets": True,
|
|
429
|
+
"commit_message_valid": True,
|
|
430
|
+
"acceptance_criteria_met": bool(plan.get("steps")),
|
|
431
|
+
"review_needed": False,
|
|
432
|
+
},
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def build_simple_event(
|
|
437
|
+
target: pathlib.Path | str,
|
|
438
|
+
event_type: str,
|
|
439
|
+
*,
|
|
440
|
+
agent: str = "unknown",
|
|
441
|
+
model: str = "unknown",
|
|
442
|
+
effort: str = "medium",
|
|
443
|
+
goal: str = "",
|
|
444
|
+
task_type: str = "feat",
|
|
445
|
+
result: str = "success",
|
|
446
|
+
files_touched: int = 0,
|
|
447
|
+
graph_nodes_used: int = 0,
|
|
448
|
+
graph_edges_used: int = 0,
|
|
449
|
+
elapsed_seconds: int = 0,
|
|
450
|
+
cost_usd: float = 0.0,
|
|
451
|
+
tests_passed: bool = True,
|
|
452
|
+
) -> dict:
|
|
453
|
+
target_path = _detect_target(target)
|
|
454
|
+
return {
|
|
455
|
+
"event_type": event_type,
|
|
456
|
+
"agent": agent,
|
|
457
|
+
"model": model,
|
|
458
|
+
"effort": effort,
|
|
459
|
+
"task": {
|
|
460
|
+
"task_id": "",
|
|
461
|
+
"goal": goal,
|
|
462
|
+
"task_type": task_type,
|
|
463
|
+
"result": result,
|
|
464
|
+
"elapsed_seconds": elapsed_seconds,
|
|
465
|
+
"cost_usd": cost_usd,
|
|
466
|
+
"tokens_in": 0,
|
|
467
|
+
"tokens_out": 0,
|
|
468
|
+
},
|
|
469
|
+
"context": {
|
|
470
|
+
"stack": _stack(target_path),
|
|
471
|
+
"files_touched": files_touched,
|
|
472
|
+
"tests_passed": tests_passed,
|
|
473
|
+
"graph_nodes_used": graph_nodes_used,
|
|
474
|
+
"graph_edges_used": graph_edges_used,
|
|
475
|
+
},
|
|
476
|
+
"quality": {
|
|
477
|
+
"lint_clean": True,
|
|
478
|
+
"no_secrets": True,
|
|
479
|
+
"commit_message_valid": True,
|
|
480
|
+
"acceptance_criteria_met": result in SUCCESS_RESULTS,
|
|
481
|
+
"review_needed": False,
|
|
482
|
+
},
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _matches_filters(
|
|
487
|
+
event: dict,
|
|
488
|
+
*,
|
|
489
|
+
since: dt.datetime | None = None,
|
|
490
|
+
until: dt.datetime | None = None,
|
|
491
|
+
agent: str = "",
|
|
492
|
+
event_type: str = "",
|
|
493
|
+
limit_task_result: str = "",
|
|
494
|
+
) -> bool:
|
|
495
|
+
ts = _dt_from_iso(str(event.get("timestamp") or ""))
|
|
496
|
+
if since and (not ts or ts < since):
|
|
497
|
+
return False
|
|
498
|
+
if until and (not ts or ts > until):
|
|
499
|
+
return False
|
|
500
|
+
if agent and str(event.get("agent") or "").lower() != agent.lower():
|
|
501
|
+
return False
|
|
502
|
+
if event_type and str(event.get("event_type") or "").lower() != event_type.lower():
|
|
503
|
+
return False
|
|
504
|
+
if limit_task_result:
|
|
505
|
+
if str((event.get("task") or {}).get("result") or "").lower() != limit_task_result.lower():
|
|
506
|
+
return False
|
|
507
|
+
return True
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _parse_period(raw: str) -> tuple[dt.datetime | None, dt.datetime | None]:
|
|
511
|
+
value = str(raw or "").strip().lower()
|
|
512
|
+
if not value or value == "all":
|
|
513
|
+
return None, None
|
|
514
|
+
if value.endswith("d"):
|
|
515
|
+
try:
|
|
516
|
+
days = max(1, int(value[:-1]))
|
|
517
|
+
except ValueError:
|
|
518
|
+
return None, None
|
|
519
|
+
return _now_dt() - dt.timedelta(days=days), None
|
|
520
|
+
return _dt_from_iso(value), None
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def load_events(
|
|
524
|
+
target: pathlib.Path | str,
|
|
525
|
+
*,
|
|
526
|
+
since: str = "",
|
|
527
|
+
until: str = "",
|
|
528
|
+
agent: str = "",
|
|
529
|
+
event_type: str = "",
|
|
530
|
+
result: str = "",
|
|
531
|
+
limit: int = 50,
|
|
532
|
+
include_archive: bool = False,
|
|
533
|
+
) -> list[dict]:
|
|
534
|
+
target_path = _detect_target(target)
|
|
535
|
+
rotate_archives(target_path)
|
|
536
|
+
since_dt, _ = _parse_period(since)
|
|
537
|
+
until_dt = _dt_from_iso(until)
|
|
538
|
+
events: list[dict] = []
|
|
539
|
+
for path in _iter_event_files(target_path, include_archive=include_archive):
|
|
540
|
+
events.extend(load_jsonl(path))
|
|
541
|
+
events.sort(key=_event_sort_key, reverse=True)
|
|
542
|
+
filtered = [
|
|
543
|
+
event for event in events
|
|
544
|
+
if _matches_filters(
|
|
545
|
+
event,
|
|
546
|
+
since=since_dt,
|
|
547
|
+
until=until_dt,
|
|
548
|
+
agent=agent,
|
|
549
|
+
event_type=event_type,
|
|
550
|
+
limit_task_result=result,
|
|
551
|
+
)
|
|
552
|
+
]
|
|
553
|
+
return filtered[: max(1, limit)]
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def _task_events(events: list[dict]) -> list[dict]:
|
|
557
|
+
return [event for event in events if str(event.get("event_type") or "").startswith("task_")]
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def _aggregate_by(events: list[dict], key: str) -> dict[str, dict]:
|
|
561
|
+
buckets: dict[str, dict] = {}
|
|
562
|
+
for event in _task_events(events):
|
|
563
|
+
if key == "agent":
|
|
564
|
+
group = str(event.get("agent") or "unknown")
|
|
565
|
+
elif key == "model":
|
|
566
|
+
group = str(event.get("model") or "unknown")
|
|
567
|
+
else:
|
|
568
|
+
group = str((event.get("task") or {}).get("task_type") or "unknown")
|
|
569
|
+
bucket = buckets.setdefault(
|
|
570
|
+
group,
|
|
571
|
+
{
|
|
572
|
+
"count": 0,
|
|
573
|
+
"success": 0,
|
|
574
|
+
"cost": 0.0,
|
|
575
|
+
"elapsed": 0.0,
|
|
576
|
+
},
|
|
577
|
+
)
|
|
578
|
+
bucket["count"] += 1
|
|
579
|
+
task = event.get("task") or {}
|
|
580
|
+
result = str(task.get("result") or "unknown")
|
|
581
|
+
if result in SUCCESS_RESULTS:
|
|
582
|
+
bucket["success"] += 1
|
|
583
|
+
bucket["cost"] += float(task.get("cost_usd") or 0)
|
|
584
|
+
bucket["elapsed"] += float(task.get("elapsed_seconds") or 0)
|
|
585
|
+
output: dict[str, dict] = {}
|
|
586
|
+
for group, bucket in sorted(buckets.items()):
|
|
587
|
+
count = bucket["count"]
|
|
588
|
+
output[group] = {
|
|
589
|
+
"count": count,
|
|
590
|
+
"success_rate": _safe_div(bucket["success"], count),
|
|
591
|
+
"total_cost": round(bucket["cost"], 4),
|
|
592
|
+
"avg_elapsed": int(bucket["elapsed"] / count) if count else 0,
|
|
593
|
+
}
|
|
594
|
+
return output
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _recommendations(stats: dict) -> list[str]:
|
|
598
|
+
recommendations: list[str] = []
|
|
599
|
+
by_model = stats.get("by_model") or {}
|
|
600
|
+
by_task_type = stats.get("by_task_type") or {}
|
|
601
|
+
if by_model:
|
|
602
|
+
ranked = sorted(by_model.items(), key=lambda item: (-item[1]["success_rate"], item[1]["total_cost"], item[0]))
|
|
603
|
+
best_model, best_metrics = ranked[0]
|
|
604
|
+
if best_metrics.get("count", 0) >= 2:
|
|
605
|
+
recommendations.append(
|
|
606
|
+
f"Use {best_model} more often ({int(best_metrics['success_rate'] * 100)}% success, ${best_metrics['total_cost']:.2f} total)"
|
|
607
|
+
)
|
|
608
|
+
if by_task_type:
|
|
609
|
+
for task_type, metrics in sorted(by_task_type.items()):
|
|
610
|
+
if metrics.get("count", 0) >= 2 and metrics.get("success_rate", 0) < 0.7:
|
|
611
|
+
recommendations.append(
|
|
612
|
+
f"{task_type} tasks are underperforming ({int(metrics['success_rate'] * 100)}% success) — review prompts and model choice"
|
|
613
|
+
)
|
|
614
|
+
break
|
|
615
|
+
return recommendations[:3]
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def compute_stats(target: pathlib.Path | str, *, period: str = "30d", by: str = "all") -> dict:
|
|
619
|
+
target_path = _detect_target(target)
|
|
620
|
+
events = load_events(target_path, since=period, limit=10000, include_archive=True)
|
|
621
|
+
tasks = _task_events(events)
|
|
622
|
+
success_count = sum(1 for event in tasks if str((event.get("task") or {}).get("result") or "") in SUCCESS_RESULTS)
|
|
623
|
+
fail_count = sum(1 for event in tasks if str((event.get("task") or {}).get("result") or "") in FAIL_RESULTS)
|
|
624
|
+
total_cost = round(sum(float((event.get("task") or {}).get("cost_usd") or 0) for event in tasks), 4)
|
|
625
|
+
grouped = {
|
|
626
|
+
"by_agent": _aggregate_by(events, "agent"),
|
|
627
|
+
"by_model": _aggregate_by(events, "model"),
|
|
628
|
+
"by_task_type": _aggregate_by(events, "task_type"),
|
|
629
|
+
}
|
|
630
|
+
summary = {
|
|
631
|
+
"period": period,
|
|
632
|
+
"event_count": len(events),
|
|
633
|
+
"task_count": len(tasks),
|
|
634
|
+
"success_rate": _safe_div(success_count, len(tasks)),
|
|
635
|
+
"success_count": success_count,
|
|
636
|
+
"failure_count": fail_count,
|
|
637
|
+
"total_cost": total_cost,
|
|
638
|
+
"recommendations": _recommendations(grouped),
|
|
639
|
+
}
|
|
640
|
+
if by in {"agent", "model", "task_type"}:
|
|
641
|
+
return {"summary": summary, f"by_{by}": grouped[f"by_{by}"]}
|
|
642
|
+
return {"summary": summary, **grouped}
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def format_list(events: list[dict], *, since: str = "") -> str:
|
|
646
|
+
if not events:
|
|
647
|
+
label = f" ({since})" if since else ""
|
|
648
|
+
return f"Recent experience events{label}:\n\nNo data yet."
|
|
649
|
+
success = 0
|
|
650
|
+
failure = 0
|
|
651
|
+
spent = 0.0
|
|
652
|
+
lines = [f"Recent experience events{f' ({since})' if since else ''}:\n"]
|
|
653
|
+
for event in events:
|
|
654
|
+
task = event.get("task") or {}
|
|
655
|
+
result = str(task.get("result") or "unknown")
|
|
656
|
+
if result in SUCCESS_RESULTS:
|
|
657
|
+
success += 1
|
|
658
|
+
mark = "✅"
|
|
659
|
+
elif result in FAIL_RESULTS:
|
|
660
|
+
failure += 1
|
|
661
|
+
mark = "❌"
|
|
662
|
+
else:
|
|
663
|
+
mark = "•"
|
|
664
|
+
spent += float(task.get("cost_usd") or 0)
|
|
665
|
+
agent_model = str(event.get("agent") or "—")
|
|
666
|
+
if event.get("model") and str(event.get("model")) != "unknown":
|
|
667
|
+
agent_model += f"/{event.get('model')}"
|
|
668
|
+
label = sanitize_text(task.get("goal") or event.get("event_type") or "", limit=28) or "metadata event"
|
|
669
|
+
if event.get("event_type") == "graph_synced":
|
|
670
|
+
label = f"{int((event.get('context') or {}).get('graph_nodes_used') or 0)} nodes, {int((event.get('context') or {}).get('graph_edges_used') or 0)} edges"
|
|
671
|
+
if event.get("event_type") in {"session_saved", "session_resumed"} and task.get("goal"):
|
|
672
|
+
label = sanitize_text(task.get("goal"), limit=28)
|
|
673
|
+
extras = []
|
|
674
|
+
if task.get("cost_usd"):
|
|
675
|
+
extras.append(f"${float(task.get('cost_usd')):.2f}")
|
|
676
|
+
if task.get("elapsed_seconds"):
|
|
677
|
+
extras.append(f"{int(task.get('elapsed_seconds'))}s")
|
|
678
|
+
lines.append(
|
|
679
|
+
f"{str(event.get('timestamp') or '')[:16].replace('T', ' ')} | "
|
|
680
|
+
f"{event.get('event_type') or ''!s:<14} | "
|
|
681
|
+
f"{agent_model:<20} | {label:<28} | {mark}"
|
|
682
|
+
+ (f" {' '.join(extras)}" if extras else "")
|
|
683
|
+
)
|
|
684
|
+
lines.append("")
|
|
685
|
+
lines.append(f"Total: {len(events)} events | {success} ✅ {failure} ❌ | ${spent:.2f} spent")
|
|
686
|
+
return "\n".join(lines)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def format_stats(stats: dict) -> str:
|
|
690
|
+
summary = stats.get("summary") or {}
|
|
691
|
+
if not summary.get("event_count"):
|
|
692
|
+
return f"Experience stats ({summary.get('period', '30d')}):\n\nNo data yet."
|
|
693
|
+
lines = [f"Experience stats ({summary.get('period', '30d')}):\n"]
|
|
694
|
+
for label, key in (("By agent", "by_agent"), ("By model", "by_model"), ("By task type", "by_task_type")):
|
|
695
|
+
if key not in stats:
|
|
696
|
+
continue
|
|
697
|
+
lines.append(f"{label}:")
|
|
698
|
+
groups = stats.get(key) or {}
|
|
699
|
+
if not groups:
|
|
700
|
+
lines.append(" No data")
|
|
701
|
+
lines.append("")
|
|
702
|
+
continue
|
|
703
|
+
for name, metrics in sorted(groups.items(), key=lambda item: (-item[1]["success_rate"], item[1]["total_cost"], item[0])):
|
|
704
|
+
lines.append(
|
|
705
|
+
f" {name:<14} | {metrics['count']:>2} tasks | "
|
|
706
|
+
f"{int(metrics['success_rate'] * 100):>3}% success | "
|
|
707
|
+
f"${metrics['total_cost']:.2f} | avg {metrics['avg_elapsed']}s"
|
|
708
|
+
)
|
|
709
|
+
lines.append("")
|
|
710
|
+
if summary.get("recommendations"):
|
|
711
|
+
lines.append("Recommendations:")
|
|
712
|
+
for item in summary["recommendations"]:
|
|
713
|
+
lines.append(f"- {item}")
|
|
714
|
+
return "\n".join(lines).rstrip()
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _auth_token() -> str:
|
|
718
|
+
state = auth.load_token() or {}
|
|
719
|
+
return str(state.get("api_key") or state.get("access_token") or state.get("token") or "")
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def _auth_ready() -> bool:
|
|
723
|
+
return bool(_auth_token())
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _plan_name() -> str:
|
|
727
|
+
state = auth.load_token() or {}
|
|
728
|
+
license_block = state.get("license") or {}
|
|
729
|
+
return str(license_block.get("plan") or state.get("plan") or "free").lower()
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def batch_for_sync(target: pathlib.Path | str, *, limit: int = SYNC_BATCH_LIMIT) -> list[dict]:
|
|
733
|
+
target_path = _detect_target(target)
|
|
734
|
+
state = _load_sync_state(target_path)
|
|
735
|
+
last_ts = str(state.get("last_synced_timestamp") or "")
|
|
736
|
+
seen = set(str(item) for item in state.get("last_synced_ids") or [])
|
|
737
|
+
events = load_events(target_path, limit=100000, include_archive=False)
|
|
738
|
+
pending: list[dict] = []
|
|
739
|
+
for event in sorted(events, key=_event_sort_key):
|
|
740
|
+
ts, event_id = _event_identity(event)
|
|
741
|
+
if last_ts and ts < last_ts:
|
|
742
|
+
continue
|
|
743
|
+
if last_ts and ts == last_ts and event_id in seen:
|
|
744
|
+
continue
|
|
745
|
+
pending.append(event)
|
|
746
|
+
if len(pending) >= limit:
|
|
747
|
+
break
|
|
748
|
+
return pending
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
def _send_http_events(events: list[dict]) -> tuple[bool, dict, bool]:
|
|
752
|
+
token = _auth_token()
|
|
753
|
+
if not token:
|
|
754
|
+
return False, {"error": "not authenticated", "hint": "run: 0dai auth login"}, False
|
|
755
|
+
body = json.dumps({"events": events}, ensure_ascii=False).encode("utf-8")
|
|
756
|
+
req = urllib.request.Request(
|
|
757
|
+
f"{API_BASE}/v1/experience/ingest",
|
|
758
|
+
data=body,
|
|
759
|
+
headers={
|
|
760
|
+
"Content-Type": "application/json",
|
|
761
|
+
"Authorization": f"Bearer {token}",
|
|
762
|
+
"User-Agent": "0dai-cli/experience",
|
|
763
|
+
},
|
|
764
|
+
method="POST",
|
|
765
|
+
)
|
|
766
|
+
try:
|
|
767
|
+
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
768
|
+
return True, json.loads(resp.read().decode("utf-8")), False
|
|
769
|
+
except urllib.error.HTTPError as exc:
|
|
770
|
+
try:
|
|
771
|
+
payload = json.loads(exc.read().decode("utf-8"))
|
|
772
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
773
|
+
payload = {"error": f"http_error:{exc.code}"}
|
|
774
|
+
return False, payload, exc.code >= 500
|
|
775
|
+
except (urllib.error.URLError, TimeoutError, OSError, json.JSONDecodeError) as exc:
|
|
776
|
+
return False, {"error": str(exc) or "network unavailable"}, True
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
# --- Outgoing-sync gate (#4372): never let our scripts, know-how, or secrets
|
|
780
|
+
# leave the client. Every event is reduced to an allowlist and dropped wholesale
|
|
781
|
+
# if any retained value matches a secret pattern; internal repos can opt out. ---
|
|
782
|
+
|
|
783
|
+
_SYNC_ALLOWED_TOP = {
|
|
784
|
+
"event_id", "timestamp", "schema_version", "project_id", "event_type",
|
|
785
|
+
"agent", "model", "task_type", "task", "context", "quality",
|
|
786
|
+
}
|
|
787
|
+
_SYNC_ALLOWED_NESTED = {
|
|
788
|
+
"task": {"goal", "result", "task_type", "tests_passed"},
|
|
789
|
+
"context": {"stack", "commit_hash", "branch"},
|
|
790
|
+
"quality": {
|
|
791
|
+
"no_secrets", "lint_clean", "commit_message_valid",
|
|
792
|
+
"acceptance_criteria_met", "review_needed",
|
|
793
|
+
},
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
_SECRET_PATTERNS = tuple(
|
|
797
|
+
re.compile(p)
|
|
798
|
+
for p in (
|
|
799
|
+
r"AKIA[0-9A-Z]{16}",
|
|
800
|
+
r"gh[pousr]_[A-Za-z0-9]{20,}",
|
|
801
|
+
r"glpat-[A-Za-z0-9_-]{20,}",
|
|
802
|
+
r"sk-(?:ant-)?[A-Za-z0-9_-]{20,}",
|
|
803
|
+
r"AGE-SECRET-KEY-1[0-9A-Z]+",
|
|
804
|
+
r"xox[baprs]-[A-Za-z0-9-]{10,}",
|
|
805
|
+
r"-----BEGIN [A-Z ]*PRIVATE KEY-----",
|
|
806
|
+
r"eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}",
|
|
807
|
+
r"(?i)\b(?:api[_-]?key|secret|token|password|passwd|bearer)\b\s*[:=]\s*\S{12,}",
|
|
808
|
+
)
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _contains_secret(value: object) -> bool:
|
|
813
|
+
if isinstance(value, str):
|
|
814
|
+
return any(pat.search(value) for pat in _SECRET_PATTERNS)
|
|
815
|
+
if isinstance(value, dict):
|
|
816
|
+
return any(_contains_secret(v) for v in value.values())
|
|
817
|
+
if isinstance(value, (list, tuple)):
|
|
818
|
+
return any(_contains_secret(v) for v in value)
|
|
819
|
+
return False
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def _allowlist(data: object, allowed: set) -> dict:
|
|
823
|
+
if not isinstance(data, dict):
|
|
824
|
+
return {}
|
|
825
|
+
return {k: v for k, v in data.items() if k in allowed}
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def gate_outgoing_event(event: dict) -> dict | None:
|
|
829
|
+
"""Reduce an event to the sync allowlist; return None to drop it.
|
|
830
|
+
|
|
831
|
+
Drops the whole event if any retained value matches a secret pattern —
|
|
832
|
+
better to lose one event than leak a credential (#4372).
|
|
833
|
+
"""
|
|
834
|
+
if not isinstance(event, dict):
|
|
835
|
+
return None
|
|
836
|
+
gated = _allowlist(event, _SYNC_ALLOWED_TOP)
|
|
837
|
+
for key, allowed in _SYNC_ALLOWED_NESTED.items():
|
|
838
|
+
if isinstance(gated.get(key), dict):
|
|
839
|
+
gated[key] = _allowlist(gated[key], allowed)
|
|
840
|
+
if _contains_secret(gated):
|
|
841
|
+
return None
|
|
842
|
+
return gated
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
def gate_outgoing_events(events: list[dict]) -> tuple[list[dict], int]:
|
|
846
|
+
kept: list[dict] = []
|
|
847
|
+
dropped = 0
|
|
848
|
+
for event in events:
|
|
849
|
+
gated = gate_outgoing_event(event)
|
|
850
|
+
if gated is None:
|
|
851
|
+
dropped += 1
|
|
852
|
+
else:
|
|
853
|
+
kept.append(gated)
|
|
854
|
+
return kept, dropped
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
def contribution_enabled(target: pathlib.Path | str = ".") -> bool:
|
|
858
|
+
"""Whether this repo may contribute experience to the shared pool.
|
|
859
|
+
|
|
860
|
+
Off when ODAI_EXPERIENCE_CONTRIBUTE is falsy, or project.yaml has
|
|
861
|
+
``experience.contribute: false``. Internal / dogfood repos opt out (#4372).
|
|
862
|
+
"""
|
|
863
|
+
env = os.environ.get("ODAI_EXPERIENCE_CONTRIBUTE", "").strip().lower()
|
|
864
|
+
if env in {"0", "false", "no", "off"}:
|
|
865
|
+
return False
|
|
866
|
+
if env in {"1", "true", "yes", "on"}:
|
|
867
|
+
return True
|
|
868
|
+
# PyYAML is optional; without it the project.yaml opt-out can't be read, so
|
|
869
|
+
# fall through to the default (contribute) rather than crashing (#4363).
|
|
870
|
+
if yaml is not None:
|
|
871
|
+
proj = _detect_target(target) / "ai" / "manifest" / "project.yaml"
|
|
872
|
+
try:
|
|
873
|
+
data = yaml.safe_load(proj.read_text(encoding="utf-8")) or {}
|
|
874
|
+
except (OSError, yaml.YAMLError):
|
|
875
|
+
pass
|
|
876
|
+
else:
|
|
877
|
+
if isinstance(data, dict):
|
|
878
|
+
experience = data.get("experience")
|
|
879
|
+
if isinstance(experience, dict) and "contribute" in experience:
|
|
880
|
+
val = experience.get("contribute")
|
|
881
|
+
if isinstance(val, bool):
|
|
882
|
+
return val
|
|
883
|
+
return str(val).strip().strip("\"'").lower() not in {"false", "no", "off", "0"}
|
|
884
|
+
return True
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def sync_preflight(target: pathlib.Path | str, *, limit: int = SYNC_BATCH_LIMIT) -> dict:
|
|
888
|
+
target_path = _detect_target(target)
|
|
889
|
+
if not contribution_enabled(target_path):
|
|
890
|
+
return {
|
|
891
|
+
"ok": True,
|
|
892
|
+
"skipped": True,
|
|
893
|
+
"reason": "contribution_disabled",
|
|
894
|
+
"plan": _plan_name(),
|
|
895
|
+
"authenticated": _auth_ready(),
|
|
896
|
+
"pending": 0,
|
|
897
|
+
}
|
|
898
|
+
plan = _plan_name()
|
|
899
|
+
if plan not in {"pro", "team", "enterprise"}:
|
|
900
|
+
return {
|
|
901
|
+
"ok": False,
|
|
902
|
+
"skipped": True,
|
|
903
|
+
"reason": "plan_not_eligible",
|
|
904
|
+
"plan": plan,
|
|
905
|
+
"authenticated": _auth_ready(),
|
|
906
|
+
"pending": 0,
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
pending = batch_for_sync(target_path, limit=limit)
|
|
910
|
+
if not pending:
|
|
911
|
+
return {
|
|
912
|
+
"ok": True,
|
|
913
|
+
"skipped": False,
|
|
914
|
+
"reason": "",
|
|
915
|
+
"plan": plan,
|
|
916
|
+
"authenticated": _auth_ready(),
|
|
917
|
+
"pending": 0,
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
if not _auth_ready():
|
|
921
|
+
return {
|
|
922
|
+
"ok": False,
|
|
923
|
+
"skipped": True,
|
|
924
|
+
"reason": "auth_required",
|
|
925
|
+
"hint": "run: 0dai auth login",
|
|
926
|
+
"plan": plan,
|
|
927
|
+
"authenticated": False,
|
|
928
|
+
"pending": len(pending),
|
|
929
|
+
"transient": False,
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
return {
|
|
933
|
+
"ok": True,
|
|
934
|
+
"skipped": False,
|
|
935
|
+
"reason": "",
|
|
936
|
+
"plan": plan,
|
|
937
|
+
"authenticated": True,
|
|
938
|
+
"pending": len(pending),
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def sync_events(target: pathlib.Path | str, *, limit: int = SYNC_BATCH_LIMIT) -> dict:
|
|
943
|
+
target_path = _detect_target(target)
|
|
944
|
+
preflight = sync_preflight(target_path, limit=limit)
|
|
945
|
+
if preflight.get("skipped"):
|
|
946
|
+
return preflight
|
|
947
|
+
if preflight.get("pending") == 0:
|
|
948
|
+
return {"ok": True, "ingested": 0, "preflight": preflight}
|
|
949
|
+
pending = batch_for_sync(target_path, limit=limit)
|
|
950
|
+
if not pending:
|
|
951
|
+
return {"ok": True, "ingested": 0, "preflight": preflight}
|
|
952
|
+
# Gate before send: reduce each event to the allowlist and drop any event
|
|
953
|
+
# carrying a secret. Never let our scripts/know-how/credentials leave (#4372).
|
|
954
|
+
outgoing, dropped = gate_outgoing_events(pending)
|
|
955
|
+
payload: dict = {}
|
|
956
|
+
if outgoing:
|
|
957
|
+
ok, payload, transient = _send_http_events(outgoing)
|
|
958
|
+
if not ok:
|
|
959
|
+
if str(payload.get("error") or "") == "not authenticated":
|
|
960
|
+
return {
|
|
961
|
+
"ok": False,
|
|
962
|
+
"skipped": True,
|
|
963
|
+
"reason": "auth_required",
|
|
964
|
+
"hint": payload.get("hint", "run: 0dai auth login"),
|
|
965
|
+
"plan": preflight.get("plan"),
|
|
966
|
+
"authenticated": False,
|
|
967
|
+
"pending": len(pending),
|
|
968
|
+
"transient": False,
|
|
969
|
+
}
|
|
970
|
+
return {"ok": False, "error": payload.get("error", "sync failed"), "transient": transient}
|
|
971
|
+
# On success (or when every event was gate-dropped) advance the cursor past
|
|
972
|
+
# `pending`, not only `outgoing`: gate-dropped events are handled locally
|
|
973
|
+
# and should not be re-scanned every sync.
|
|
974
|
+
last = pending[-1]
|
|
975
|
+
same_ts_ids = [event.get("event_id") for event in pending if event.get("timestamp") == last.get("timestamp")]
|
|
976
|
+
_save_sync_state(
|
|
977
|
+
target_path,
|
|
978
|
+
{
|
|
979
|
+
"last_synced_timestamp": last.get("timestamp"),
|
|
980
|
+
"last_synced_ids": same_ts_ids[-SYNC_RETENTION_IDS:],
|
|
981
|
+
"last_sync_response": payload,
|
|
982
|
+
"updated_at": _now(),
|
|
983
|
+
},
|
|
984
|
+
)
|
|
985
|
+
return {"ok": True, "ingested": len(outgoing), "dropped": dropped, **payload}
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
def validate_event(event: dict) -> list[str]:
|
|
989
|
+
issues: list[str] = []
|
|
990
|
+
required = ["event_id", "timestamp", "project_id", "event_type", "agent", "model", "task", "context", "quality"]
|
|
991
|
+
for key in required:
|
|
992
|
+
if key not in event:
|
|
993
|
+
issues.append(f"missing:{key}")
|
|
994
|
+
if str(event.get("event_type") or "") not in SUPPORTED_EVENT_TYPES:
|
|
995
|
+
issues.append("invalid:event_type")
|
|
996
|
+
serialized = json.dumps(event, ensure_ascii=False)
|
|
997
|
+
if any(token in serialized.lower() for token in ("```", "def ", "class ", "function ", "raw_code", "source_code", "\"content\"")):
|
|
998
|
+
issues.append("privacy:source_like_content")
|
|
999
|
+
return issues
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def ingest_events(
|
|
1003
|
+
root_dir: pathlib.Path,
|
|
1004
|
+
events: list[dict],
|
|
1005
|
+
*,
|
|
1006
|
+
user_id: str,
|
|
1007
|
+
plan: str,
|
|
1008
|
+
device: str = "",
|
|
1009
|
+
cli_version: str = "",
|
|
1010
|
+
) -> dict:
|
|
1011
|
+
if plan not in {"pro", "team", "enterprise"}:
|
|
1012
|
+
return {"ok": False, "error": "Experience sync requires Pro plan"}
|
|
1013
|
+
ingested = 0
|
|
1014
|
+
for raw_event in events:
|
|
1015
|
+
event = dict(raw_event)
|
|
1016
|
+
event["_submitted_at"] = _now()
|
|
1017
|
+
event["_submitted_by"] = user_id
|
|
1018
|
+
event["_plan"] = plan
|
|
1019
|
+
if device:
|
|
1020
|
+
event["_device"] = device[:32]
|
|
1021
|
+
if cli_version:
|
|
1022
|
+
event["_cli_version"] = cli_version
|
|
1023
|
+
issues = validate_event(event)
|
|
1024
|
+
if issues:
|
|
1025
|
+
return {"ok": False, "error": "invalid events", "issues": issues}
|
|
1026
|
+
project_id = str(event.get("project_id") or "unknown")
|
|
1027
|
+
path = _server_event_path(project_id, str(event.get("timestamp") or _now()))
|
|
1028
|
+
append_jsonl(path, event)
|
|
1029
|
+
ingested += 1
|
|
1030
|
+
return {"ok": True, "ingested": ingested}
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def cmd_list(args: argparse.Namespace) -> int:
|
|
1034
|
+
target = _detect_target(args.target)
|
|
1035
|
+
events = load_events(
|
|
1036
|
+
target,
|
|
1037
|
+
since=args.since,
|
|
1038
|
+
agent=args.agent,
|
|
1039
|
+
event_type=args.type,
|
|
1040
|
+
result=args.result,
|
|
1041
|
+
limit=args.limit,
|
|
1042
|
+
include_archive=True,
|
|
1043
|
+
)
|
|
1044
|
+
if args.json:
|
|
1045
|
+
print(json.dumps({"events": events, "total": len(events)}, indent=2, ensure_ascii=False))
|
|
1046
|
+
else:
|
|
1047
|
+
print(format_list(events, since=args.since))
|
|
1048
|
+
return 0
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
def cmd_stats(args: argparse.Namespace) -> int:
|
|
1052
|
+
stats = compute_stats(_detect_target(args.target), period=args.period, by=args.by)
|
|
1053
|
+
if args.json:
|
|
1054
|
+
print(json.dumps(stats, indent=2, ensure_ascii=False))
|
|
1055
|
+
else:
|
|
1056
|
+
print(format_stats(stats))
|
|
1057
|
+
return 0
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
def cmd_record_json(args: argparse.Namespace) -> int:
|
|
1061
|
+
try:
|
|
1062
|
+
payload = json.loads((args.payload or os.sys.stdin.read()).strip() or "{}")
|
|
1063
|
+
except json.JSONDecodeError as exc:
|
|
1064
|
+
print(f"invalid payload: {exc}")
|
|
1065
|
+
return 1
|
|
1066
|
+
event = record_event(_detect_target(args.target), payload)
|
|
1067
|
+
if args.json:
|
|
1068
|
+
print(json.dumps(event, indent=2, ensure_ascii=False))
|
|
1069
|
+
return 0
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
def cmd_sync(args: argparse.Namespace) -> int:
|
|
1073
|
+
result = sync_events(_detect_target(args.target), limit=args.limit)
|
|
1074
|
+
if args.json:
|
|
1075
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
1076
|
+
elif result.get("ok"):
|
|
1077
|
+
print(f"synced {int(result.get('ingested', 0))} event(s)")
|
|
1078
|
+
elif result.get("skipped"):
|
|
1079
|
+
print(f"skipped: {result.get('reason')}")
|
|
1080
|
+
else:
|
|
1081
|
+
print(f"error: {result.get('error', 'sync failed')}")
|
|
1082
|
+
return 0 if result.get("ok") or result.get("skipped") else 1
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
1086
|
+
parser = argparse.ArgumentParser(prog="0dai experience", description="Structured experience event pipeline.")
|
|
1087
|
+
sub = parser.add_subparsers(dest="command")
|
|
1088
|
+
|
|
1089
|
+
list_parser = sub.add_parser("list")
|
|
1090
|
+
list_parser.add_argument("--target", default=".")
|
|
1091
|
+
list_parser.add_argument("--since", default="7d")
|
|
1092
|
+
list_parser.add_argument("--agent", default="")
|
|
1093
|
+
list_parser.add_argument("--type", default="")
|
|
1094
|
+
list_parser.add_argument("--result", default="")
|
|
1095
|
+
list_parser.add_argument("--limit", type=int, default=50)
|
|
1096
|
+
list_parser.add_argument("--json", action="store_true")
|
|
1097
|
+
list_parser.set_defaults(func=cmd_list)
|
|
1098
|
+
|
|
1099
|
+
stats_parser = sub.add_parser("stats")
|
|
1100
|
+
stats_parser.add_argument("--target", default=".")
|
|
1101
|
+
stats_parser.add_argument("--period", default="30d")
|
|
1102
|
+
stats_parser.add_argument("--by", default="all", choices=["all", "agent", "model", "task_type"])
|
|
1103
|
+
stats_parser.add_argument("--json", action="store_true")
|
|
1104
|
+
stats_parser.set_defaults(func=cmd_stats)
|
|
1105
|
+
|
|
1106
|
+
record_parser = sub.add_parser("record-json")
|
|
1107
|
+
record_parser.add_argument("--target", default=".")
|
|
1108
|
+
record_parser.add_argument("--payload", default="")
|
|
1109
|
+
record_parser.add_argument("--json", action="store_true")
|
|
1110
|
+
record_parser.set_defaults(func=cmd_record_json)
|
|
1111
|
+
|
|
1112
|
+
sync_parser = sub.add_parser("sync")
|
|
1113
|
+
sync_parser.add_argument("--target", default=".")
|
|
1114
|
+
sync_parser.add_argument("--limit", type=int, default=SYNC_BATCH_LIMIT)
|
|
1115
|
+
sync_parser.add_argument("--json", action="store_true")
|
|
1116
|
+
sync_parser.set_defaults(func=cmd_sync)
|
|
1117
|
+
return parser
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
def main(argv: list[str] | None = None) -> int:
|
|
1121
|
+
parser = build_parser()
|
|
1122
|
+
args = parser.parse_args(argv)
|
|
1123
|
+
if not getattr(args, "command", ""):
|
|
1124
|
+
parser.print_help()
|
|
1125
|
+
return 1
|
|
1126
|
+
return int(args.func(args))
|
|
1127
|
+
|
|
1128
|
+
|
|
1129
|
+
if __name__ == "__main__":
|
|
1130
|
+
raise SystemExit(main())
|