arkaos 2.75.0 → 2.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/config/hooks/stop.sh +35 -0
- package/config/hooks/user-prompt-submit.sh +20 -0
- package/core/governance/__pycache__/closing_marker_check.cpython-313.pyc +0 -0
- package/core/governance/closing_marker_check.py +65 -0
- package/core/runtime/__pycache__/codex_cli.cpython-313.pyc +0 -0
- package/core/runtime/__pycache__/llm_provider.cpython-313.pyc +0 -0
- package/core/runtime/codex_cli.py +22 -13
- package/core/runtime/llm_provider.py +15 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.
|
|
1
|
+
2.77.0
|
package/config/hooks/stop.sh
CHANGED
|
@@ -219,6 +219,38 @@ try:
|
|
|
219
219
|
except Exception:
|
|
220
220
|
pass
|
|
221
221
|
|
|
222
|
+
# PR59 v2.76.0 — Closing-marker soft block. Telemetry analysis showed
|
|
223
|
+
# 0% [arka:phase:13]/[arka:trivial] rate on flow-required turns. Persist
|
|
224
|
+
# result to /tmp/arkaos-closing/<session>.json so the next
|
|
225
|
+
# UserPromptSubmit can surface a nudge if missing.
|
|
226
|
+
closing_check_passed = True
|
|
227
|
+
closing_check_reason = "trivial"
|
|
228
|
+
closing_check_suggestion: str | None = None
|
|
229
|
+
try:
|
|
230
|
+
from core.governance.closing_marker_check import check_closing_marker
|
|
231
|
+
cmr = check_closing_marker(last)
|
|
232
|
+
closing_check_passed = cmr.passed
|
|
233
|
+
closing_check_reason = cmr.reason
|
|
234
|
+
closing_check_suggestion = cmr.suggestion
|
|
235
|
+
if safe_sid:
|
|
236
|
+
prev_umask = os.umask(0o077)
|
|
237
|
+
try:
|
|
238
|
+
closing_dir = Path("/tmp/arkaos-closing")
|
|
239
|
+
closing_dir.mkdir(parents=True, exist_ok=True)
|
|
240
|
+
closing_path = closing_dir / f"{safe_sid}.json"
|
|
241
|
+
closing_path.write_text(
|
|
242
|
+
json.dumps({
|
|
243
|
+
"passed": cmr.passed,
|
|
244
|
+
"reason": cmr.reason,
|
|
245
|
+
"suggestion": cmr.suggestion,
|
|
246
|
+
}),
|
|
247
|
+
encoding="utf-8",
|
|
248
|
+
)
|
|
249
|
+
finally:
|
|
250
|
+
os.umask(prev_umask)
|
|
251
|
+
except Exception:
|
|
252
|
+
pass
|
|
253
|
+
|
|
222
254
|
entry = {
|
|
223
255
|
"ts": datetime.now(timezone.utc).isoformat(),
|
|
224
256
|
"session_id": session_id,
|
|
@@ -237,6 +269,9 @@ entry = {
|
|
|
237
269
|
"kb_cite_topic_score": cite_topic_score,
|
|
238
270
|
"meta_tag_check_passed": meta_passed,
|
|
239
271
|
"meta_tag_check_reason": meta_reason,
|
|
272
|
+
# PR59 v2.76.0 — Closing-marker soft-block telemetry.
|
|
273
|
+
"closing_marker_check_passed": closing_check_passed,
|
|
274
|
+
"closing_marker_check_reason": closing_check_reason,
|
|
240
275
|
# PR46 v2.65.0 — Claude Code effort level captured for later analysis
|
|
241
276
|
# of nudge-suppression rates. Unset / unknown values land as "".
|
|
242
277
|
"effort_level": os.environ.get("EFFORT_LEVEL_VAL", ""),
|
|
@@ -426,6 +426,24 @@ if [ -n "$SESSION_ID" ] && [ "$_ARKA_SURFACE_NUDGES" = "true" ]; then
|
|
|
426
426
|
fi
|
|
427
427
|
fi
|
|
428
428
|
|
|
429
|
+
# ─── Closing-marker nudge (PR59 v2.76.0) ─────────────────────────────────
|
|
430
|
+
# Mirror of meta-tag nudge but for [arka:phase:13] / [arka:trivial]
|
|
431
|
+
# closing markers. One-shot; deleted after read.
|
|
432
|
+
_CLOSING_MARKER_NUDGE=""
|
|
433
|
+
if [ -n "$SESSION_ID" ] && [ "$_ARKA_SURFACE_NUDGES" = "true" ]; then
|
|
434
|
+
_CLOSING_FILE="/tmp/arkaos-closing/${SESSION_ID}.json"
|
|
435
|
+
if [ -f "$_CLOSING_FILE" ]; then
|
|
436
|
+
if command -v jq &>/dev/null; then
|
|
437
|
+
_CLOSING_PASSED=$(jq -r '.passed' "$_CLOSING_FILE" 2>/dev/null)
|
|
438
|
+
_CLOSING_SUGGEST=$(jq -r '.suggestion // ""' "$_CLOSING_FILE" 2>/dev/null)
|
|
439
|
+
if [ "$_CLOSING_PASSED" = "false" ] && [ -n "$_CLOSING_SUGGEST" ] && [ "$_CLOSING_SUGGEST" != "null" ]; then
|
|
440
|
+
_CLOSING_MARKER_NUDGE="[arka:suggest] ${_CLOSING_SUGGEST}"
|
|
441
|
+
fi
|
|
442
|
+
fi
|
|
443
|
+
rm -f "$_CLOSING_FILE" 2>/dev/null
|
|
444
|
+
fi
|
|
445
|
+
fi
|
|
446
|
+
|
|
429
447
|
# ─── Output ──────────────────────────────────────────────────────────────
|
|
430
448
|
_OUT_CONTEXT="${_ARKA_GREETING:-}${_SYNC_NOTICE:-}${_ROUTE_REMINDER}${_WORKFLOW_DIRECTIVE} $python_result"
|
|
431
449
|
[ -n "$_HYGIENE" ] && _OUT_CONTEXT="$_OUT_CONTEXT $_HYGIENE"
|
|
@@ -433,6 +451,8 @@ _OUT_CONTEXT="${_ARKA_GREETING:-}${_SYNC_NOTICE:-}${_ROUTE_REMINDER}${_WORKFLOW_
|
|
|
433
451
|
$_KB_CITE_NUDGE"
|
|
434
452
|
[ -n "$_META_TAG_NUDGE" ] && _OUT_CONTEXT="$_OUT_CONTEXT
|
|
435
453
|
$_META_TAG_NUDGE"
|
|
454
|
+
[ -n "$_CLOSING_MARKER_NUDGE" ] && _OUT_CONTEXT="$_OUT_CONTEXT
|
|
455
|
+
$_CLOSING_MARKER_NUDGE"
|
|
436
456
|
[ -n "$_ARKA_CONTEXT_HITS" ] && _OUT_CONTEXT="$_OUT_CONTEXT
|
|
437
457
|
$_ARKA_CONTEXT_HITS"
|
|
438
458
|
# Escape for JSON
|
|
Binary file
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""[arka:phase:13] / [arka:trivial] closing-marker soft-block (PR59 v2.76.0).
|
|
2
|
+
|
|
3
|
+
Response-side classifier. Inspects the closing assistant message of a
|
|
4
|
+
flow-required turn for the mandatory closure marker — either
|
|
5
|
+
``[arka:phase:13]`` (full flow completed) or ``[arka:trivial]``
|
|
6
|
+
(trivial bypass). Mirrors the contract of
|
|
7
|
+
``core.governance.meta_tag_check`` (PR30 v2.49.0) and
|
|
8
|
+
``core.governance.kb_cite_check`` (PR18 v2.40.0).
|
|
9
|
+
|
|
10
|
+
Telemetry analysis from the May 24-25 continuous-build session showed
|
|
11
|
+
**0% closing-marker rate** on every flow-required turn (5/5 rows
|
|
12
|
+
without ``[arka:phase:13]`` or ``[arka:trivial]``). PR59 surfaces the
|
|
13
|
+
gap to the next-turn nudge layer so the model is reminded to close
|
|
14
|
+
each flow-required turn with an explicit marker.
|
|
15
|
+
|
|
16
|
+
Soft-block contract — never raises. Hooks consume ClosingMarkerResult
|
|
17
|
+
and decide whether to surface a suggestion.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_PHASE13_RE: re.Pattern[str] = re.compile(r"\[arka:phase:13\]", re.IGNORECASE)
|
|
27
|
+
_TRIVIAL_RE: re.Pattern[str] = re.compile(r"\[arka:trivial\]", re.IGNORECASE)
|
|
28
|
+
_TRIVIAL_WORD_THRESHOLD: int = 15
|
|
29
|
+
_SUGGESTION_TEXT: str = (
|
|
30
|
+
"Closing marker missing — end every flow-required turn with "
|
|
31
|
+
"`[arka:phase:13] <label>` (full canonical flow) or "
|
|
32
|
+
"`[arka:trivial] <reason>` (single-file edit < 10 lines, "
|
|
33
|
+
"imperative verb). Without the marker, telemetry can't confirm "
|
|
34
|
+
"the turn closed cleanly."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class ClosingMarkerResult:
|
|
40
|
+
"""Verdict of a closing-marker check. Immutable; safe to log as JSON."""
|
|
41
|
+
|
|
42
|
+
passed: bool
|
|
43
|
+
reason: str
|
|
44
|
+
suggestion: str | None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def check_closing_marker(response_text: str) -> ClosingMarkerResult:
|
|
48
|
+
"""Classify whether a response carries a closing flow marker.
|
|
49
|
+
|
|
50
|
+
Order matters: a SHORT response *with* a marker still counts as
|
|
51
|
+
`present` — the trivial-length bypass only short-circuits when no
|
|
52
|
+
marker is found.
|
|
53
|
+
"""
|
|
54
|
+
text = response_text or ""
|
|
55
|
+
if _PHASE13_RE.search(text):
|
|
56
|
+
return ClosingMarkerResult(True, "phase13", None)
|
|
57
|
+
if _TRIVIAL_RE.search(text):
|
|
58
|
+
return ClosingMarkerResult(True, "trivial", None)
|
|
59
|
+
if _is_trivial_length(text):
|
|
60
|
+
return ClosingMarkerResult(True, "trivial-length", None)
|
|
61
|
+
return ClosingMarkerResult(False, "missing", _SUGGESTION_TEXT)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _is_trivial_length(text: str) -> bool:
|
|
65
|
+
return len(text.split()) < _TRIVIAL_WORD_THRESHOLD
|
|
Binary file
|
|
Binary file
|
|
@@ -76,10 +76,16 @@ class CodexCliAdapter(RuntimeAdapter):
|
|
|
76
76
|
raise NotImplementedError("Use Codex CLI's native content search")
|
|
77
77
|
|
|
78
78
|
def headless_supported(self) -> bool:
|
|
79
|
-
#
|
|
80
|
-
#
|
|
81
|
-
#
|
|
82
|
-
|
|
79
|
+
# Auto-detect: headless is supported iff the `codex` binary is
|
|
80
|
+
# on PATH. When the operator installs Codex CLI later, this
|
|
81
|
+
# lights up without any code change (the headless_complete()
|
|
82
|
+
# method below already gates on shutil.which() too, so a missing
|
|
83
|
+
# binary will raise cleanly).
|
|
84
|
+
#
|
|
85
|
+
# Note: even when the binary is present, headless_complete()
|
|
86
|
+
# still raises until the invocation syntax is verified locally.
|
|
87
|
+
# See TODO(llm-agnostic) below for the verification checklist.
|
|
88
|
+
return shutil.which("codex") is not None
|
|
83
89
|
|
|
84
90
|
def headless_complete(
|
|
85
91
|
self,
|
|
@@ -96,14 +102,15 @@ class CodexCliAdapter(RuntimeAdapter):
|
|
|
96
102
|
)
|
|
97
103
|
# TODO(llm-agnostic): Implement real headless completion.
|
|
98
104
|
#
|
|
99
|
-
# Status as of 2026-
|
|
100
|
-
#
|
|
101
|
-
#
|
|
102
|
-
#
|
|
105
|
+
# Status as of 2026-05-25 (PR60): Codex CLI still not verified
|
|
106
|
+
# in any ArkaOS dev environment. headless_supported() now
|
|
107
|
+
# auto-detects the binary on PATH so this lights up the moment
|
|
108
|
+
# someone installs it — but the actual subprocess call below
|
|
109
|
+
# still needs syntax verification before we can stop refusing.
|
|
103
110
|
#
|
|
104
111
|
# Verification checklist for whoever picks this up:
|
|
105
112
|
# 1. Install: npm install -g @openai/codex-cli
|
|
106
|
-
# 2. Discover: codex --help
|
|
113
|
+
# 2. Discover: codex --help (confirm non-interactive flag)
|
|
107
114
|
# 3. Pattern: likely `codex exec "<prompt>"` or
|
|
108
115
|
# `codex --prompt "<prompt>" --format json`
|
|
109
116
|
# 4. Wire the subprocess call (mirror the Gemini adapter —
|
|
@@ -113,9 +120,11 @@ class CodexCliAdapter(RuntimeAdapter):
|
|
|
113
120
|
# SubagentProvider cleanly falls back to anthropic-direct or
|
|
114
121
|
# stub when this raises, so the chain keeps working.
|
|
115
122
|
raise NotImplementedError(
|
|
116
|
-
"Codex CLI headless mode requires
|
|
117
|
-
"
|
|
118
|
-
"
|
|
119
|
-
"
|
|
123
|
+
"Codex CLI headless mode requires verified invocation syntax. "
|
|
124
|
+
"The `codex` binary is on PATH but ArkaOS has not validated "
|
|
125
|
+
"the non-interactive call shape locally. "
|
|
126
|
+
"Verification steps: `codex --help`, then update "
|
|
127
|
+
"core/runtime/codex_cli.py::headless_complete to call the "
|
|
128
|
+
"discovered subprocess shape. "
|
|
120
129
|
"SubagentProvider will cleanly fall back to anthropic-direct or stub."
|
|
121
130
|
)
|
|
@@ -362,6 +362,19 @@ def get_llm_provider(config_path: Path | None = None) -> LLMProvider:
|
|
|
362
362
|
return last if last is not None else StubProvider()
|
|
363
363
|
|
|
364
364
|
|
|
365
|
+
def _current_category() -> str:
|
|
366
|
+
"""Resolve the per-call category from the environment.
|
|
367
|
+
|
|
368
|
+
PR60 v2.77.0 — orchestration layers can set
|
|
369
|
+
``ARKA_CALL_CATEGORY=skill:<slug>`` /
|
|
370
|
+
``subagent:<dept>`` / ``plugin:<id>`` / ``mcp:<server>`` before
|
|
371
|
+
invoking the provider so `/arka costs --by-category` (PR47) can
|
|
372
|
+
attribute spend. Returns "" when unset, which lands in the base
|
|
373
|
+
bucket (backward-compatible).
|
|
374
|
+
"""
|
|
375
|
+
return os.environ.get("ARKA_CALL_CATEGORY", "").strip()
|
|
376
|
+
|
|
377
|
+
|
|
365
378
|
def _log_fallback(preferred: str, selected: str, reason: str = "") -> None:
|
|
366
379
|
# Piggy-back on the cost telemetry file: zero-token, provider-only row.
|
|
367
380
|
# Downstream can group by provider to spot degraded chains.
|
|
@@ -373,6 +386,7 @@ def _log_fallback(preferred: str, selected: str, reason: str = "") -> None:
|
|
|
373
386
|
tokens_out=0,
|
|
374
387
|
cached_tokens=0,
|
|
375
388
|
estimated_cost_usd=None,
|
|
389
|
+
category=_current_category(),
|
|
376
390
|
)
|
|
377
391
|
|
|
378
392
|
|
|
@@ -391,4 +405,5 @@ def _record(session_id: str, provider: str, response: LLMResponse) -> None:
|
|
|
391
405
|
tokens_out=response.tokens_out,
|
|
392
406
|
cached_tokens=response.cached_tokens,
|
|
393
407
|
estimated_cost_usd=cost,
|
|
408
|
+
category=_current_category(),
|
|
394
409
|
)
|
package/package.json
CHANGED