arkaos 2.64.0 → 2.66.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/config/hooks/stop.sh +17 -0
- package/config/hooks/user-prompt-submit.sh +17 -2
- package/core/runtime/__pycache__/llm_cost_telemetry.cpython-313.pyc +0 -0
- package/core/runtime/__pycache__/llm_cost_telemetry_cli.cpython-313.pyc +0 -0
- package/core/runtime/llm_cost_telemetry.py +9 -0
- package/core/runtime/llm_cost_telemetry_cli.py +11 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.
|
|
1
|
+
2.66.0
|
package/config/hooks/stop.sh
CHANGED
|
@@ -19,12 +19,25 @@ SESSION_ID=""
|
|
|
19
19
|
TRANSCRIPT_PATH=""
|
|
20
20
|
STOP_HOOK_ACTIVE=""
|
|
21
21
|
CWD=""
|
|
22
|
+
EFFORT_LEVEL=""
|
|
22
23
|
if command -v jq &>/dev/null; then
|
|
23
24
|
SESSION_ID=$(echo "$input" | jq -r '.session_id // ""' 2>/dev/null)
|
|
24
25
|
TRANSCRIPT_PATH=$(echo "$input" | jq -r '.transcript_path // ""' 2>/dev/null)
|
|
25
26
|
STOP_HOOK_ACTIVE=$(echo "$input" | jq -r '.stop_hook_active // ""' 2>/dev/null)
|
|
26
27
|
CWD=$(echo "$input" | jq -r '.cwd // ""' 2>/dev/null)
|
|
28
|
+
# PR46 v2.65.0 — Claude Code W19 ships effort.level in hook stdin and
|
|
29
|
+
# $CLAUDE_EFFORT env var. Soft-block checks (kb-cite, meta-tag) only
|
|
30
|
+
# run at high|xhigh; hard enforcement runs regardless.
|
|
31
|
+
EFFORT_LEVEL=$(echo "$input" | jq -r '.effort.level // ""' 2>/dev/null)
|
|
27
32
|
fi
|
|
33
|
+
# Fallback to env var if stdin didn't carry it
|
|
34
|
+
[ -z "$EFFORT_LEVEL" ] && EFFORT_LEVEL="${CLAUDE_EFFORT:-}"
|
|
35
|
+
|
|
36
|
+
# Telemetry-only signal. Soft-block checks (kb_cite, meta_tag, sycophancy)
|
|
37
|
+
# always run here because they're cheap and feed /arka compliance.
|
|
38
|
+
# What is effort-gated is the NUDGE SURFACING in user-prompt-submit.sh
|
|
39
|
+
# (whether the next turn sees a [arka:suggest] line). Record the level
|
|
40
|
+
# on the telemetry row so we can later analyze suppression rates.
|
|
28
41
|
|
|
29
42
|
# Prevent infinite loops when Stop hook was triggered by its own decision.
|
|
30
43
|
if [ "$STOP_HOOK_ACTIVE" = "true" ]; then
|
|
@@ -60,6 +73,7 @@ SESSION_ID_VAL="$SESSION_ID" \
|
|
|
60
73
|
TRANSCRIPT_PATH_VAL="$TRANSCRIPT_PATH" \
|
|
61
74
|
CWD_VAL="$CWD" \
|
|
62
75
|
ARKAOS_ROOT_VAL="$ARKAOS_ROOT" \
|
|
76
|
+
EFFORT_LEVEL_VAL="$EFFORT_LEVEL" \
|
|
63
77
|
python3 - <<'PY' 2>/dev/null
|
|
64
78
|
import json
|
|
65
79
|
import os
|
|
@@ -223,6 +237,9 @@ entry = {
|
|
|
223
237
|
"kb_cite_topic_score": cite_topic_score,
|
|
224
238
|
"meta_tag_check_passed": meta_passed,
|
|
225
239
|
"meta_tag_check_reason": meta_reason,
|
|
240
|
+
# PR46 v2.65.0 — Claude Code effort level captured for later analysis
|
|
241
|
+
# of nudge-suppression rates. Unset / unknown values land as "".
|
|
242
|
+
"effort_level": os.environ.get("EFFORT_LEVEL_VAL", ""),
|
|
226
243
|
"mode": "warn",
|
|
227
244
|
}
|
|
228
245
|
|
|
@@ -84,10 +84,25 @@ mkdir -p "$CACHE_DIR" 2>/dev/null
|
|
|
84
84
|
# ─── Extract user input from hook JSON ───────────────────────────────────
|
|
85
85
|
user_input=""
|
|
86
86
|
SESSION_ID=""
|
|
87
|
+
EFFORT_LEVEL=""
|
|
87
88
|
if command -v jq &>/dev/null; then
|
|
88
89
|
user_input=$(echo "$input" | jq -r '.userInput // .message // ""' 2>/dev/null)
|
|
89
90
|
SESSION_ID=$(echo "$input" | jq -r '.session_id // ""' 2>/dev/null)
|
|
91
|
+
# PR46 v2.65.0 — Claude Code W19 ships effort.level in hook stdin.
|
|
92
|
+
# Soft-block nudges (KB-first + meta-tag) are gated by effort: only
|
|
93
|
+
# surfaced at high|xhigh; low/medium skip the nudge to avoid forcing
|
|
94
|
+
# the model to comply with full contracts during cheap exploratory
|
|
95
|
+
# turns. Hard enforcement (PreToolUse flow_enforcer) runs regardless.
|
|
96
|
+
EFFORT_LEVEL=$(echo "$input" | jq -r '.effort.level // ""' 2>/dev/null)
|
|
90
97
|
fi
|
|
98
|
+
[ -z "$EFFORT_LEVEL" ] && EFFORT_LEVEL="${CLAUDE_EFFORT:-}"
|
|
99
|
+
|
|
100
|
+
# Decide whether soft-block nudges surface to the next turn.
|
|
101
|
+
_ARKA_SURFACE_NUDGES="true"
|
|
102
|
+
case "${EFFORT_LEVEL:-high}" in
|
|
103
|
+
low|medium) _ARKA_SURFACE_NUDGES="false" ;;
|
|
104
|
+
*) _ARKA_SURFACE_NUDGES="true" ;;
|
|
105
|
+
esac
|
|
91
106
|
|
|
92
107
|
# ─── Flow marker cache invalidation (v2 — new turn, reset ALLOW cache) ──
|
|
93
108
|
# Cheap, non-blocking, runs before Synapse so a stuck Python later cannot
|
|
@@ -376,7 +391,7 @@ fi
|
|
|
376
391
|
# the suggestion to the model in this turn's additionalContext. One-shot:
|
|
377
392
|
# the file is deleted after read so the nudge does not repeat across turns.
|
|
378
393
|
_KB_CITE_NUDGE=""
|
|
379
|
-
if [ -n "$SESSION_ID" ]; then
|
|
394
|
+
if [ -n "$SESSION_ID" ] && [ "$_ARKA_SURFACE_NUDGES" = "true" ]; then
|
|
380
395
|
_CITE_FILE="/tmp/arkaos-cite/${SESSION_ID}.json"
|
|
381
396
|
if [ -f "$_CITE_FILE" ]; then
|
|
382
397
|
if command -v jq &>/dev/null; then
|
|
@@ -397,7 +412,7 @@ fi
|
|
|
397
412
|
# Mirror of the KB citation nudge but for the [arka:meta] one-liner
|
|
398
413
|
# contract. One-shot; deleted after read.
|
|
399
414
|
_META_TAG_NUDGE=""
|
|
400
|
-
if [ -n "$SESSION_ID" ]; then
|
|
415
|
+
if [ -n "$SESSION_ID" ] && [ "$_ARKA_SURFACE_NUDGES" = "true" ]; then
|
|
401
416
|
_META_FILE="/tmp/arkaos-meta/${SESSION_ID}.json"
|
|
402
417
|
if [ -f "$_META_FILE" ]; then
|
|
403
418
|
if command -v jq &>/dev/null; then
|
|
Binary file
|
|
Binary file
|
|
@@ -66,9 +66,15 @@ def record_cost(
|
|
|
66
66
|
tokens_out: int,
|
|
67
67
|
cached_tokens: int,
|
|
68
68
|
estimated_cost_usd: float | None,
|
|
69
|
+
category: str = "",
|
|
69
70
|
) -> None:
|
|
70
71
|
"""Append one JSONL line describing an LLM call's cost.
|
|
71
72
|
|
|
73
|
+
`category` mirrors Claude Code v2.1.149's per-category usage
|
|
74
|
+
breakdown: ``"skill:<slug>"``, ``"subagent:<dept>"``,
|
|
75
|
+
``"plugin:<id>"``, ``"mcp:<server>"``, or ``""`` for base usage.
|
|
76
|
+
Free-form string — the aggregator groups whatever it sees.
|
|
77
|
+
|
|
72
78
|
Silently swallows all errors. Telemetry must never break a
|
|
73
79
|
completion call. The caller decides whether to compute the cost via
|
|
74
80
|
`core.runtime.pricing.estimate_cost_usd` or pass None.
|
|
@@ -87,6 +93,7 @@ def record_cost(
|
|
|
87
93
|
if estimated_cost_usd is not None
|
|
88
94
|
else None
|
|
89
95
|
),
|
|
96
|
+
"category": str(category or ""),
|
|
90
97
|
}
|
|
91
98
|
with _locked_append(_telemetry_path()) as fh:
|
|
92
99
|
fh.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
@@ -133,6 +140,7 @@ class CostSummary:
|
|
|
133
140
|
call_count: int
|
|
134
141
|
by_provider: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
135
142
|
by_model: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
143
|
+
by_category: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
136
144
|
by_session: list[dict[str, Any]] = field(default_factory=list)
|
|
137
145
|
advisories: list[str] = field(default_factory=list)
|
|
138
146
|
corrupt_line_count: int = 0
|
|
@@ -291,6 +299,7 @@ def summarise(
|
|
|
291
299
|
call_count=finalised["call_count"],
|
|
292
300
|
by_provider=_group(entries, "provider"),
|
|
293
301
|
by_model=_group(entries, "model"),
|
|
302
|
+
by_category=_group(entries, "category"),
|
|
294
303
|
by_session=sessions,
|
|
295
304
|
advisories=_build_advisories(sessions, advisory_threshold_usd),
|
|
296
305
|
corrupt_line_count=corrupt,
|
|
@@ -89,6 +89,12 @@ def _render_sessions(rows: list[dict[str, Any]], title: str) -> list[str]:
|
|
|
89
89
|
return lines
|
|
90
90
|
|
|
91
91
|
|
|
92
|
+
def _has_category_data(group: dict[str, dict[str, Any]]) -> bool:
|
|
93
|
+
# The summariser always returns at least the "" bucket for legacy
|
|
94
|
+
# rows. Hide the section unless ≥ 1 row has a non-empty key.
|
|
95
|
+
return any(k.strip() for k in group.keys())
|
|
96
|
+
|
|
97
|
+
|
|
92
98
|
def _render_advisories(advisories: list[str]) -> list[str]:
|
|
93
99
|
if not advisories:
|
|
94
100
|
return []
|
|
@@ -101,6 +107,11 @@ def _format_summary(summary: CostSummary) -> str:
|
|
|
101
107
|
parts.append("")
|
|
102
108
|
parts.extend(_render_group("By provider", summary.by_provider))
|
|
103
109
|
parts.extend(_render_group("By model", summary.by_model))
|
|
110
|
+
# Per-category breakdown (Claude Code v2.1.149+): skill, subagent,
|
|
111
|
+
# plugin, mcp-server. Renders only when at least one categorised
|
|
112
|
+
# entry exists so old telemetry doesn't show an empty section.
|
|
113
|
+
if _has_category_data(summary.by_category):
|
|
114
|
+
parts.extend(_render_group("By category", summary.by_category))
|
|
104
115
|
parts.extend(_render_sessions(summary.by_session, "Top 10 sessions"))
|
|
105
116
|
parts.extend(_render_advisories(summary.advisories))
|
|
106
117
|
if summary.corrupt_line_count:
|
package/package.json
CHANGED