arkaos 3.73.1 → 3.74.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/arka/skills/flow/SKILL.md +25 -1
- package/config/agent-ownership.yaml +13 -0
- package/config/constitution.yaml +5 -0
- package/config/hooks/post-tool-use.sh +43 -0
- package/core/governance/__pycache__/agent_experiences.cpython-313.pyc +0 -0
- package/core/governance/__pycache__/agent_experiences_cli.cpython-313.pyc +0 -0
- package/core/governance/__pycache__/cqo_experience_recorder.cpython-313.pyc +0 -0
- package/core/governance/agent_experiences.py +176 -0
- package/core/governance/agent_experiences_cli.py +98 -0
- package/core/governance/cqo_experience_recorder.py +172 -0
- package/core/synapse/__pycache__/agent_experiences_layer.cpython-313.pyc +0 -0
- package/core/synapse/__pycache__/engine.cpython-313.pyc +0 -0
- package/core/synapse/agent_experiences_layer.py +117 -0
- package/core/synapse/engine.py +5 -0
- package/installer/cli.js +5 -2
- package/package.json +1 -1
- package/pyproject.toml +1 -1
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.
|
|
1
|
+
3.74.1
|
|
@@ -73,6 +73,16 @@ state the gap explicitly and propose filling it.
|
|
|
73
73
|
Dispatch specialists via the `Agent` tool. The squad lead from Phase 3
|
|
74
74
|
names them. Specialists run in parallel when work is independent.
|
|
75
75
|
|
|
76
|
+
**Experience injection (PR3 v3.74.0).** When a specialist is dispatched,
|
|
77
|
+
Synapse layer `L2.6 AgentExperiences`
|
|
78
|
+
(`core/synapse/agent_experiences_layer.py`) detects the
|
|
79
|
+
`[arka:dispatch] <from> -> <to>` marker and loads the top-5 most recent
|
|
80
|
+
`Experience` records for the target agent from
|
|
81
|
+
`~/.arkaos/agents/<agent_id>/experiences.jsonl`. The records list past
|
|
82
|
+
Quality Gate REJECTED verdicts with their blockers and patterns. The
|
|
83
|
+
dispatched specialist must read them and avoid repeating the failure
|
|
84
|
+
modes. Operator-side audit: `python -m core.governance.agent_experiences_cli list <agent_id>`.
|
|
85
|
+
|
|
76
86
|
**Dispatch must be announced (NON-NEGOTIABLE `dispatch-must-be-announced`).**
|
|
77
87
|
Immediately before each `Agent` tool call, emit on its own line:
|
|
78
88
|
|
|
@@ -140,7 +150,21 @@ For each item, in order:
|
|
|
140
150
|
injection, missing auth, data exposure.
|
|
141
151
|
- Fail → back to the todo.
|
|
142
152
|
5. **Quality Gate** — Marta (CQO) orchestrates the right specialists
|
|
143
|
-
for the area.
|
|
153
|
+
for the area.
|
|
154
|
+
|
|
155
|
+
**CQO dispatch convention (PR3.5 v3.74.1):** when invoking the `cqo`
|
|
156
|
+
subagent, the orchestrator MUST include the marker
|
|
157
|
+
`[arka:reviewing <agent_id>]` in the dispatch prompt, naming the
|
|
158
|
+
agent whose work is under review (e.g.
|
|
159
|
+
`[arka:reviewing tech-lead-paulo]`). On a REJECTED verdict, the
|
|
160
|
+
PostToolUse hook `config/hooks/post-tool-use.sh` reads the marker
|
|
161
|
+
plus the verdict text and auto-appends an `Experience` to that
|
|
162
|
+
agent's log — closing the QG learning loop without manual
|
|
163
|
+
bookkeeping. The L2.6 Synapse layer
|
|
164
|
+
(`core/synapse/agent_experiences_layer.py`) injects the lessons
|
|
165
|
+
into the next dispatch automatically. APPROVED verdicts produce no
|
|
166
|
+
record (only failures are lessons). If a specialist is
|
|
167
|
+
missing, stop and advise the user
|
|
144
168
|
to create one via `/arka personas` + provide the knowledge.
|
|
145
169
|
- Fail → back to the todo.
|
|
146
170
|
6. Document — save the completed work to Obsidian + vector DB.
|
|
@@ -122,6 +122,19 @@ ownership:
|
|
|
122
122
|
owners: [devops-eng]
|
|
123
123
|
reason: "Infrastructure-as-code requires devops specialist"
|
|
124
124
|
|
|
125
|
+
# PR3.5 v3.74.1 — installer + dashboard launcher coverage
|
|
126
|
+
- pattern: "installer/**/*.js"
|
|
127
|
+
owners: [devops-eng, senior-dev]
|
|
128
|
+
reason: "npx arkaos installer surface requires devops + backend review"
|
|
129
|
+
|
|
130
|
+
- pattern: "scripts/start-dashboard*"
|
|
131
|
+
owners: [devops-eng]
|
|
132
|
+
reason: "Dashboard launcher is operational devops surface"
|
|
133
|
+
|
|
134
|
+
- pattern: "scripts/dashboard-api.py"
|
|
135
|
+
owners: [devops-eng, senior-dev]
|
|
136
|
+
reason: "Dashboard API backend bridges installer ops + Python service code"
|
|
137
|
+
|
|
125
138
|
# ─── Core architecture ──────────────────────────────────────────────
|
|
126
139
|
- pattern: "core/workflow/**/*.py"
|
|
127
140
|
owners: [architect, senior-dev]
|
package/config/constitution.yaml
CHANGED
|
@@ -193,6 +193,11 @@ enforcement_levels:
|
|
|
193
193
|
rule: "Dispatch subagents only when task requires >3 Reads or >5 Greps or isolated context. Never parallel subagents sharing state. Prefer main thread for trivial tasks."
|
|
194
194
|
enforcement: "warning"
|
|
195
195
|
|
|
196
|
+
# ─── Rule added in PR3 Squad Intelligence Upgrade (2026-05-28) ───────
|
|
197
|
+
- id: agent-experience-persistence
|
|
198
|
+
rule: "Quality Gate REJECTED verdicts MUST result in an Experience record for the failing agent, written to ~/.arkaos/agents/<agent_id>/experiences.jsonl via core.governance.cqo_experience_recorder. Next dispatch of that agent loads the experiences via the Synapse L2.6 layer (core.synapse.agent_experiences_layer) so the lesson carries across sessions. Without this, REJECTED reports evaporate as conversation turns and the same structural mistakes recur."
|
|
199
|
+
enforcement: "PR3 v3.74.0 ships the storage, parser, and Synapse layer. v3.74.1 will wire a PostToolUse hook on the Agent tool that auto-records on REJECTED verdicts. Until then orchestrators MUST call cqo_experience_recorder.record_from_verdict() after every CQO dispatch. Audit via python -m core.governance.agent_experiences_cli list <agent_id>."
|
|
200
|
+
|
|
196
201
|
should:
|
|
197
202
|
description: "Best practices. Encouraged but not enforced."
|
|
198
203
|
rules:
|
|
@@ -72,6 +72,49 @@ except Exception:
|
|
|
72
72
|
fi
|
|
73
73
|
fi
|
|
74
74
|
|
|
75
|
+
# ─── CQO REJECTED auto-record (PR3.5 v3.74.1) ────────────────────────
|
|
76
|
+
# When a Task/Agent dispatch to subagent_type=cqo returns
|
|
77
|
+
# `Quality Gate Verdict: REJECTED`, append an Experience to the
|
|
78
|
+
# failing agent's log. The agent under review is identified by the
|
|
79
|
+
# `[arka:reviewing <agent_id>]` marker that the orchestrator MUST
|
|
80
|
+
# include in the CQO dispatch prompt (constitution rule
|
|
81
|
+
# `agent-experience-persistence`). Never blocks the hook.
|
|
82
|
+
if [ "$TOOL_NAME" = "Task" ] || [ "$TOOL_NAME" = "Agent" ]; then
|
|
83
|
+
SUBAGENT_TYPE=$(echo "$input" | jq -r '.tool_input.subagent_type // ""' 2>/dev/null)
|
|
84
|
+
if [ "$SUBAGENT_TYPE" = "cqo" ] && echo "$TOOL_OUTPUT" | grep -qE 'Quality Gate Verdict:[[:space:]]*REJECTED'; then
|
|
85
|
+
TOOL_INPUT_PROMPT=$(echo "$input" | jq -r '.tool_input.prompt // ""' 2>/dev/null)
|
|
86
|
+
REVIEWING_TARGET=$(printf '%s' "$TOOL_INPUT_PROMPT" \
|
|
87
|
+
| grep -oE '\[arka:reviewing[[:space:]]+[A-Za-z0-9_.-]+\]' \
|
|
88
|
+
| head -1 \
|
|
89
|
+
| sed -E 's/.*\[arka:reviewing[[:space:]]+([A-Za-z0-9_.-]+)\].*/\1/')
|
|
90
|
+
if [ -n "$REVIEWING_TARGET" ]; then
|
|
91
|
+
_AE_ROOT="${ARKAOS_ROOT:-}"
|
|
92
|
+
if [ -z "$_AE_ROOT" ] && [ -f "$HOME/.arkaos/.repo-path" ]; then
|
|
93
|
+
_AE_ROOT=$(cat "$HOME/.arkaos/.repo-path" 2>/dev/null)
|
|
94
|
+
fi
|
|
95
|
+
[ -z "$_AE_ROOT" ] && _AE_ROOT="$HOME/.arkaos"
|
|
96
|
+
VERDICT_TEXT="$TOOL_OUTPUT" \
|
|
97
|
+
AGENT_ID="$REVIEWING_TARGET" \
|
|
98
|
+
SESSION_ID="$SESSION_ID_PTU" \
|
|
99
|
+
ARKAOS_ROOT="$_AE_ROOT" \
|
|
100
|
+
python3 - <<'PY' 2>/dev/null || true
|
|
101
|
+
import os, sys
|
|
102
|
+
sys.path.insert(0, os.environ["ARKAOS_ROOT"])
|
|
103
|
+
try:
|
|
104
|
+
from core.governance.cqo_experience_recorder import record_from_verdict
|
|
105
|
+
record_from_verdict(
|
|
106
|
+
verdict_text=os.environ.get("VERDICT_TEXT", ""),
|
|
107
|
+
agent_id=os.environ.get("AGENT_ID", ""),
|
|
108
|
+
session_id=os.environ.get("SESSION_ID", ""),
|
|
109
|
+
context="auto-recorded via PostToolUse hook (cqo dispatch REJECTED)",
|
|
110
|
+
)
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
PY
|
|
114
|
+
fi
|
|
115
|
+
fi
|
|
116
|
+
fi
|
|
117
|
+
|
|
75
118
|
# Only process if there was an error
|
|
76
119
|
if [ "$EXIT_CODE" = "0" ] || [ -z "$EXIT_CODE" ]; then
|
|
77
120
|
# Also check for error patterns in output even with exit code 0
|
|
Binary file
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Agent experience persistence — Quality Gate feedback loop store.
|
|
2
|
+
|
|
3
|
+
When Marta (CQO) rejects an agent's work, an `Experience` record is
|
|
4
|
+
appended to `~/.arkaos/agents/<agent_id>/experiences.jsonl`. The next
|
|
5
|
+
time that agent is dispatched, recent experiences are injected as
|
|
6
|
+
context so the agent inherits prior failures across sessions.
|
|
7
|
+
|
|
8
|
+
This closes the long-standing QG learning gap: rejection reports used
|
|
9
|
+
to live only in the PR thread; the agent that failed had no way to
|
|
10
|
+
recall the structural mistake on the next pass. The Paulo of next
|
|
11
|
+
month now sees what the Paulo of today learned the hard way.
|
|
12
|
+
|
|
13
|
+
PR3 of the Squad Intelligence Upgrade.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
from contextlib import contextmanager
|
|
20
|
+
from dataclasses import asdict, dataclass, field
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from core.shared import safe_session_id as _safe_session_id_module
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import fcntl # POSIX only
|
|
28
|
+
_HAS_FLOCK = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
_HAS_FLOCK = False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
AGENTS_ROOT: Path = Path.home() / ".arkaos" / "agents"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Experience:
|
|
38
|
+
"""One QG verdict (or other lesson) captured for an agent.
|
|
39
|
+
|
|
40
|
+
`patterns` is a list (not a single string) because a verdict can fail
|
|
41
|
+
on multiple structural issues at once — e.g. function-length AND
|
|
42
|
+
governance-gap. PR3 v3.74.0 changed from `pattern: str | None` to
|
|
43
|
+
`patterns: list[str]` after Marta's QG-B6 ruled first-match-wins was
|
|
44
|
+
masking secondary patterns.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
ts: str
|
|
48
|
+
agent_id: str
|
|
49
|
+
session_id: str
|
|
50
|
+
context: str
|
|
51
|
+
verdict: str
|
|
52
|
+
blockers: list[str] = field(default_factory=list)
|
|
53
|
+
patterns: list[str] = field(default_factory=list)
|
|
54
|
+
fix_applied: str | None = None
|
|
55
|
+
references: list[str] = field(default_factory=list)
|
|
56
|
+
tags: list[str] = field(default_factory=list)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def experience_to_dict(exp: Experience) -> dict:
|
|
60
|
+
"""Public serialiser for callers that need to persist outside this store."""
|
|
61
|
+
return asdict(exp)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@contextmanager
|
|
65
|
+
def _locked_append(path: Path):
|
|
66
|
+
"""Append to `path` under POSIX flock; Windows falls back to O_APPEND atomicity."""
|
|
67
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
fh = path.open("a", encoding="utf-8")
|
|
69
|
+
try:
|
|
70
|
+
if _HAS_FLOCK:
|
|
71
|
+
fcntl.flock(fh.fileno(), fcntl.LOCK_EX)
|
|
72
|
+
yield fh
|
|
73
|
+
finally:
|
|
74
|
+
if _HAS_FLOCK:
|
|
75
|
+
try:
|
|
76
|
+
fcntl.flock(fh.fileno(), fcntl.LOCK_UN)
|
|
77
|
+
except OSError:
|
|
78
|
+
pass
|
|
79
|
+
fh.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _safe_agent_id(agent_id: str) -> str | None:
|
|
83
|
+
"""Apply the same allowlist as session IDs (CWE-22 path-traversal guard)."""
|
|
84
|
+
return _safe_session_id_module.safe_session_id(agent_id)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _path_for(agent_id: str) -> Path | None:
|
|
88
|
+
safe = _safe_agent_id(agent_id)
|
|
89
|
+
if safe is None:
|
|
90
|
+
return None
|
|
91
|
+
return AGENTS_ROOT / safe / "experiences.jsonl"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def record_experience(experience: Experience) -> None:
|
|
95
|
+
"""Append an experience to the agent's JSONL.
|
|
96
|
+
|
|
97
|
+
Silently drops the record when the agent_id fails the safe-id check
|
|
98
|
+
or when filesystem I/O fails — recording must never block whatever
|
|
99
|
+
triggered the QG verdict.
|
|
100
|
+
"""
|
|
101
|
+
path = _path_for(experience.agent_id)
|
|
102
|
+
if path is None:
|
|
103
|
+
return
|
|
104
|
+
try:
|
|
105
|
+
with _locked_append(path) as fh:
|
|
106
|
+
fh.write(json.dumps(asdict(experience)) + "\n")
|
|
107
|
+
except OSError:
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _parse_entry(line: str) -> Experience | None:
|
|
112
|
+
"""Decode one JSONL line into an Experience, or return None on bad input."""
|
|
113
|
+
try:
|
|
114
|
+
data = json.loads(line)
|
|
115
|
+
except json.JSONDecodeError:
|
|
116
|
+
return None
|
|
117
|
+
try:
|
|
118
|
+
return Experience(**data)
|
|
119
|
+
except (TypeError, ValueError):
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _filter_entry(
|
|
124
|
+
exp: Experience, since: datetime | None, tag: str | None
|
|
125
|
+
) -> bool:
|
|
126
|
+
"""Return True if the entry passes both filters."""
|
|
127
|
+
if since is not None:
|
|
128
|
+
try:
|
|
129
|
+
ts = datetime.fromisoformat(exp.ts)
|
|
130
|
+
except (TypeError, ValueError):
|
|
131
|
+
return False
|
|
132
|
+
if ts < since:
|
|
133
|
+
return False
|
|
134
|
+
if tag is not None and tag not in (exp.tags or []):
|
|
135
|
+
return False
|
|
136
|
+
return True
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _read_entries(
|
|
140
|
+
path: Path, since: datetime | None, tag: str | None
|
|
141
|
+
) -> list[Experience]:
|
|
142
|
+
"""Parse the JSONL and apply filters. Empty on I/O error."""
|
|
143
|
+
entries: list[Experience] = []
|
|
144
|
+
try:
|
|
145
|
+
with path.open(encoding="utf-8") as fh:
|
|
146
|
+
for line in fh:
|
|
147
|
+
if not line.strip():
|
|
148
|
+
continue
|
|
149
|
+
exp = _parse_entry(line)
|
|
150
|
+
if exp is None:
|
|
151
|
+
continue
|
|
152
|
+
if _filter_entry(exp, since, tag):
|
|
153
|
+
entries.append(exp)
|
|
154
|
+
except OSError:
|
|
155
|
+
return []
|
|
156
|
+
return entries
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def query_experiences(
|
|
160
|
+
agent_id: str,
|
|
161
|
+
*,
|
|
162
|
+
limit: int = 5,
|
|
163
|
+
since: datetime | None = None,
|
|
164
|
+
tag: str | None = None,
|
|
165
|
+
) -> list[Experience]:
|
|
166
|
+
"""Read experiences for an agent. Most recent first.
|
|
167
|
+
|
|
168
|
+
Empty list when the agent has no record or the agent_id is unsafe.
|
|
169
|
+
Malformed JSONL lines are skipped silently.
|
|
170
|
+
"""
|
|
171
|
+
path = _path_for(agent_id)
|
|
172
|
+
if path is None or not path.exists():
|
|
173
|
+
return []
|
|
174
|
+
entries = _read_entries(path, since, tag)
|
|
175
|
+
entries.sort(key=lambda e: e.ts, reverse=True)
|
|
176
|
+
return entries[:limit]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""CLI viewer for agent experiences.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python -m core.governance.agent_experiences_cli list <agent_id> [options]
|
|
5
|
+
|
|
6
|
+
Options:
|
|
7
|
+
--limit N Show at most N most-recent experiences (default 10)
|
|
8
|
+
--since DATE ISO date or datetime (e.g. 2026-05-01)
|
|
9
|
+
--tag TAG Show only entries with this tag
|
|
10
|
+
|
|
11
|
+
Examples:
|
|
12
|
+
python -m core.governance.agent_experiences_cli list tech-lead-paulo
|
|
13
|
+
python -m core.governance.agent_experiences_cli list cqo-marta --limit 5
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import sys
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
|
|
22
|
+
from core.governance.agent_experiences import query_experiences
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _format_experience(exp, index: int) -> str:
|
|
26
|
+
lines = [
|
|
27
|
+
f" [{index}] {exp.ts} {exp.verdict} {exp.context}",
|
|
28
|
+
]
|
|
29
|
+
if exp.patterns:
|
|
30
|
+
lines.append(f" patterns: {', '.join(exp.patterns)}")
|
|
31
|
+
for blocker in (exp.blockers or [])[:5]:
|
|
32
|
+
lines.append(f" - {blocker}")
|
|
33
|
+
if exp.fix_applied:
|
|
34
|
+
lines.append(f" fix: {exp.fix_applied}")
|
|
35
|
+
if exp.references:
|
|
36
|
+
refs = ", ".join(exp.references[:3])
|
|
37
|
+
lines.append(f" refs: {refs}")
|
|
38
|
+
if exp.tags:
|
|
39
|
+
lines.append(f" tags: {', '.join(exp.tags)}")
|
|
40
|
+
return "\n".join(lines)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _parse_since(value: str) -> datetime:
|
|
44
|
+
"""Accept either an ISO date (YYYY-MM-DD) or full ISO datetime."""
|
|
45
|
+
for fmt in ("%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"):
|
|
46
|
+
try:
|
|
47
|
+
return datetime.strptime(value, fmt)
|
|
48
|
+
except ValueError:
|
|
49
|
+
continue
|
|
50
|
+
try:
|
|
51
|
+
return datetime.fromisoformat(value)
|
|
52
|
+
except ValueError as exc:
|
|
53
|
+
raise SystemExit(f"error: invalid --since value: {value}") from exc
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
57
|
+
parser = argparse.ArgumentParser(
|
|
58
|
+
prog="python -m core.governance.agent_experiences_cli",
|
|
59
|
+
description="Inspect Quality Gate experience records for an agent.",
|
|
60
|
+
)
|
|
61
|
+
subparsers = parser.add_subparsers(dest="cmd", required=True)
|
|
62
|
+
list_p = subparsers.add_parser("list", help="List experiences for an agent.")
|
|
63
|
+
list_p.add_argument("agent_id", help="Agent ID, e.g. tech-lead-paulo")
|
|
64
|
+
list_p.add_argument("--limit", type=int, default=10, help="Max records (default 10)")
|
|
65
|
+
list_p.add_argument("--since", default=None, help="ISO date or datetime cutoff")
|
|
66
|
+
list_p.add_argument("--tag", default=None, help="Filter by tag")
|
|
67
|
+
return parser
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _print_results(agent_id: str, experiences: list) -> int:
|
|
71
|
+
if not experiences:
|
|
72
|
+
print(f"No experiences recorded for {agent_id}.")
|
|
73
|
+
return 0
|
|
74
|
+
print(
|
|
75
|
+
f"Experiences for {agent_id} "
|
|
76
|
+
f"({len(experiences)} record(s), most recent first):\n"
|
|
77
|
+
)
|
|
78
|
+
for i, exp in enumerate(experiences, start=1):
|
|
79
|
+
print(_format_experience(exp, i))
|
|
80
|
+
print()
|
|
81
|
+
return 0
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def main(argv: list[str] | None = None) -> int:
|
|
85
|
+
parser = _build_parser()
|
|
86
|
+
args = parser.parse_args(argv if argv is not None else sys.argv[1:])
|
|
87
|
+
if args.cmd != "list":
|
|
88
|
+
parser.print_help()
|
|
89
|
+
return 2
|
|
90
|
+
since = _parse_since(args.since) if args.since else None
|
|
91
|
+
experiences = query_experiences(
|
|
92
|
+
args.agent_id, limit=args.limit, since=since, tag=args.tag
|
|
93
|
+
)
|
|
94
|
+
return _print_results(args.agent_id, experiences)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == "__main__": # pragma: no cover
|
|
98
|
+
sys.exit(main())
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""Parse Marta (CQO) verdict text and persist Experience records.
|
|
2
|
+
|
|
3
|
+
When the orchestrator dispatches the `cqo` subagent for a Quality Gate
|
|
4
|
+
review, Marta returns a verdict in a stable format (`Quality Gate
|
|
5
|
+
Verdict: APPROVED|REJECTED`, with blockers labelled `B1.`, `B2.`,
|
|
6
|
+
`M1.`, ...). This module parses that text and, when the verdict is
|
|
7
|
+
REJECTED, appends an `Experience` to the failing agent's log so future
|
|
8
|
+
dispatches inherit the lesson.
|
|
9
|
+
|
|
10
|
+
For PR3 v1 the recorder is invoked manually by the orchestrator after a
|
|
11
|
+
CQO dispatch. A future PR can wire it into a PostToolUse hook on the
|
|
12
|
+
`Agent` tool so the persistence happens automatically.
|
|
13
|
+
|
|
14
|
+
PR3 of the Squad Intelligence Upgrade.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import re
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
|
|
23
|
+
from core.governance.agent_experiences import (
|
|
24
|
+
Experience,
|
|
25
|
+
record_experience,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_VERDICT_RE = re.compile(
|
|
30
|
+
r"Quality Gate Verdict:\s*(APPROVED|REJECTED)", re.IGNORECASE
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Blocker headings used by Marta across the codebase. Examples observed:
|
|
34
|
+
# `**B1.` `**B2.` `**M1.` (markdown bold + dot/colon)
|
|
35
|
+
# `B1.` `B1:` (plain)
|
|
36
|
+
# `B1 description` (space-only separator — PR3 v3.74.0 widened per QG B5)
|
|
37
|
+
# `B10.` `B11.` (double-digit labels)
|
|
38
|
+
#
|
|
39
|
+
# Documented limitation: inline blocker references mid-paragraph
|
|
40
|
+
# (e.g., "The reviewer noted B1. is problematic") are NOT extracted —
|
|
41
|
+
# only line-anchored labels qualify. This is intentional to keep
|
|
42
|
+
# false-positive rate low; if we ever need inline capture, add a
|
|
43
|
+
# separate pass with a stricter context check.
|
|
44
|
+
_BLOCKER_RE = re.compile(
|
|
45
|
+
r"^(?:\*\*)?\s*([BMN])(\d+)[\s\.:](?:\s*\*\*)?\s*(.+?)(?:\*\*)?$",
|
|
46
|
+
re.MULTILINE,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Common pattern hints Marta surfaces. Order matters — first match wins.
|
|
50
|
+
_PATTERN_HINTS: tuple[tuple[str, str], ...] = (
|
|
51
|
+
(r"function length|line ceiling|\d+\s+lines?|exceeds.*line", "function-length-violation"),
|
|
52
|
+
(r"command[ -]injection|CWE-77|shell escape", "command-injection-risk"),
|
|
53
|
+
(r"path[ -]traversal|CWE-22", "path-traversal-risk"),
|
|
54
|
+
(r"undocumented|missing.*constitution|not in flow", "governance-gap"),
|
|
55
|
+
(r"missing.*test|zero.*coverage|no pytest", "test-coverage-gap"),
|
|
56
|
+
(r"workaround|hack|shortcut|TODO", "shortcut-applied"),
|
|
57
|
+
(r"client name|leak|confidential", "confidentiality-risk"),
|
|
58
|
+
(r"sycophancy|yes[- ]man|capitulat", "sycophancy-violation"),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ParsedVerdict:
|
|
64
|
+
"""Structured view of a Marta verdict string."""
|
|
65
|
+
|
|
66
|
+
verdict: str # "APPROVED" | "REJECTED" | "UNKNOWN"
|
|
67
|
+
blockers: list[str]
|
|
68
|
+
patterns: list[str]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def parse_cqo_verdict(text: str) -> ParsedVerdict:
|
|
72
|
+
"""Extract verdict, blocker list, and ALL matching pattern hints."""
|
|
73
|
+
if not text:
|
|
74
|
+
return ParsedVerdict(verdict="UNKNOWN", blockers=[], patterns=[])
|
|
75
|
+
verdict = _extract_verdict(text)
|
|
76
|
+
blockers = _extract_blockers(text) if verdict == "REJECTED" else []
|
|
77
|
+
patterns = _classify_patterns(text) if verdict == "REJECTED" else []
|
|
78
|
+
return ParsedVerdict(verdict=verdict, blockers=blockers, patterns=patterns)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _extract_verdict(text: str) -> str:
|
|
82
|
+
match = _VERDICT_RE.search(text)
|
|
83
|
+
if not match:
|
|
84
|
+
return "UNKNOWN"
|
|
85
|
+
return match.group(1).upper()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _extract_blockers(text: str) -> list[str]:
|
|
89
|
+
"""Capture lines that start with a blocker label (B/M/N + digits)."""
|
|
90
|
+
blockers: list[str] = []
|
|
91
|
+
for match in _BLOCKER_RE.finditer(text):
|
|
92
|
+
kind, num, headline = match.group(1), match.group(2), match.group(3)
|
|
93
|
+
# Strip markdown markers and trailing whitespace.
|
|
94
|
+
headline = headline.replace("**", "").strip()
|
|
95
|
+
# Cap headline length so a single misformatted line cannot dominate.
|
|
96
|
+
if len(headline) > 200:
|
|
97
|
+
headline = headline[:197] + "..."
|
|
98
|
+
blockers.append(f"{kind}{num}: {headline}")
|
|
99
|
+
return blockers
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _classify_patterns(text: str) -> list[str]:
|
|
103
|
+
"""Return ALL matching pattern labels, in registry order.
|
|
104
|
+
|
|
105
|
+
First-match-wins was masking secondary patterns (PR3 QG-B6): a
|
|
106
|
+
verdict citing both governance-gap and function-length would be
|
|
107
|
+
classified only as function-length, and the agent would miss the
|
|
108
|
+
structural lesson. Returning all matches lets the dispatched agent
|
|
109
|
+
see every category at once.
|
|
110
|
+
"""
|
|
111
|
+
lowered = text.lower()
|
|
112
|
+
matched: list[str] = []
|
|
113
|
+
for pattern, label in _PATTERN_HINTS:
|
|
114
|
+
if re.search(pattern, lowered, re.IGNORECASE):
|
|
115
|
+
matched.append(label)
|
|
116
|
+
return matched
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _build_experience(
|
|
120
|
+
parsed: "ParsedVerdict",
|
|
121
|
+
*,
|
|
122
|
+
agent_id: str,
|
|
123
|
+
session_id: str,
|
|
124
|
+
context: str,
|
|
125
|
+
references: list[str] | None,
|
|
126
|
+
tags: list[str] | None,
|
|
127
|
+
fix_applied: str | None,
|
|
128
|
+
) -> Experience:
|
|
129
|
+
"""Compose an Experience from a parsed REJECTED verdict + caller metadata."""
|
|
130
|
+
return Experience(
|
|
131
|
+
ts=datetime.now(timezone.utc).isoformat(),
|
|
132
|
+
agent_id=agent_id,
|
|
133
|
+
session_id=session_id,
|
|
134
|
+
context=context,
|
|
135
|
+
verdict="REJECTED",
|
|
136
|
+
blockers=parsed.blockers,
|
|
137
|
+
patterns=parsed.patterns,
|
|
138
|
+
fix_applied=fix_applied,
|
|
139
|
+
references=references or [],
|
|
140
|
+
tags=tags or [],
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def record_from_verdict(
|
|
145
|
+
*,
|
|
146
|
+
verdict_text: str,
|
|
147
|
+
agent_id: str,
|
|
148
|
+
session_id: str,
|
|
149
|
+
context: str,
|
|
150
|
+
references: list[str] | None = None,
|
|
151
|
+
tags: list[str] | None = None,
|
|
152
|
+
fix_applied: str | None = None,
|
|
153
|
+
) -> Experience | None:
|
|
154
|
+
"""Parse `verdict_text` and append one Experience to `agent_id`'s log.
|
|
155
|
+
|
|
156
|
+
Returns the persisted Experience, or None when the verdict is not
|
|
157
|
+
REJECTED (APPROVED + UNKNOWN are not lessons worth recording).
|
|
158
|
+
"""
|
|
159
|
+
parsed = parse_cqo_verdict(verdict_text)
|
|
160
|
+
if parsed.verdict != "REJECTED":
|
|
161
|
+
return None
|
|
162
|
+
experience = _build_experience(
|
|
163
|
+
parsed,
|
|
164
|
+
agent_id=agent_id,
|
|
165
|
+
session_id=session_id,
|
|
166
|
+
context=context,
|
|
167
|
+
references=references,
|
|
168
|
+
tags=tags,
|
|
169
|
+
fix_applied=fix_applied,
|
|
170
|
+
)
|
|
171
|
+
record_experience(experience)
|
|
172
|
+
return experience
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Synapse layer L2.6 — Agent Experience injection.
|
|
2
|
+
|
|
3
|
+
When the user prompt contains `[arka:dispatch] <from> -> <target>`, this
|
|
4
|
+
layer queries `core.governance.agent_experiences` for the target agent's
|
|
5
|
+
recent experiences (REJECTED verdicts, lessons captured by the QG loop)
|
|
6
|
+
and injects them as context so the dispatched specialist inherits prior
|
|
7
|
+
failures across sessions.
|
|
8
|
+
|
|
9
|
+
Designed as a standalone `Layer` subclass — engine wiring happens in a
|
|
10
|
+
follow-up release (v3.74.1). For PR3 v1, callers (the UserPromptSubmit
|
|
11
|
+
hook, or a manual dispatch wrapper) invoke `compute()` directly.
|
|
12
|
+
|
|
13
|
+
Cache TTL: 30s. The experience file is appended-to, not rewritten, so a
|
|
14
|
+
short TTL keeps newly-recorded lessons visible to the immediately-next
|
|
15
|
+
dispatch.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
import time
|
|
22
|
+
|
|
23
|
+
from core.governance.agent_experiences import Experience, query_experiences
|
|
24
|
+
from core.synapse.layers import Layer, LayerResult, PromptContext
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Mirror the parser in core.workflow.specialist_enforcer so we recognise
|
|
28
|
+
# the same marker the operator (and the constitution rule
|
|
29
|
+
# `dispatch-must-be-announced`) require for specialist dispatches.
|
|
30
|
+
_DISPATCH_RE = re.compile(
|
|
31
|
+
r"\[arka:dispatch\]\s*[\w-]+\s*->\s*([\w-]+)", re.IGNORECASE
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AgentExperiencesLayer(Layer):
|
|
36
|
+
"""L2.6 — inject recent experiences for the dispatched specialist."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, limit: int = 5) -> None:
|
|
39
|
+
self._limit = limit
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def id(self) -> str:
|
|
43
|
+
return "L2.6"
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def name(self) -> str:
|
|
47
|
+
return "AgentExperiences"
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def cache_ttl(self) -> int:
|
|
51
|
+
return 30
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def priority(self) -> int:
|
|
55
|
+
return 25 # after AgentLayer (L2 prio 20), before KBContext (L2.5)
|
|
56
|
+
|
|
57
|
+
def compute(self, ctx: PromptContext) -> LayerResult:
|
|
58
|
+
start = time.time()
|
|
59
|
+
target = _extract_dispatch_target(ctx.user_input)
|
|
60
|
+
if target is None:
|
|
61
|
+
return self._empty_result(start)
|
|
62
|
+
|
|
63
|
+
experiences = query_experiences(target, limit=self._limit)
|
|
64
|
+
if not experiences:
|
|
65
|
+
return self._empty_result(start, tag=f"[agent-experiences:{target} none]")
|
|
66
|
+
|
|
67
|
+
content = format_experiences(target, experiences)
|
|
68
|
+
ms = int((time.time() - start) * 1000)
|
|
69
|
+
return LayerResult(
|
|
70
|
+
layer_id=self.id,
|
|
71
|
+
tag=f"[agent-experiences:{target} count:{len(experiences)}]",
|
|
72
|
+
content=content,
|
|
73
|
+
tokens_est=max(1, len(content) // 4),
|
|
74
|
+
compute_ms=ms,
|
|
75
|
+
cached=False,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def _empty_result(self, start: float, tag: str = "") -> LayerResult:
|
|
79
|
+
return LayerResult(
|
|
80
|
+
layer_id=self.id,
|
|
81
|
+
tag=tag,
|
|
82
|
+
content="",
|
|
83
|
+
tokens_est=0,
|
|
84
|
+
compute_ms=int((time.time() - start) * 1000),
|
|
85
|
+
cached=False,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _extract_dispatch_target(user_input: str) -> str | None:
|
|
90
|
+
"""Return the agent id from the most recent `[arka:dispatch]` marker."""
|
|
91
|
+
if not user_input:
|
|
92
|
+
return None
|
|
93
|
+
matches = list(_DISPATCH_RE.finditer(user_input))
|
|
94
|
+
if not matches:
|
|
95
|
+
return None
|
|
96
|
+
return matches[-1].group(1).lower()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def format_experiences(target: str, experiences: list[Experience]) -> str:
|
|
100
|
+
"""Render a compact, model-readable summary of past lessons."""
|
|
101
|
+
lines = [f"Past lessons for {target} (most recent first):"]
|
|
102
|
+
for i, exp in enumerate(experiences, start=1):
|
|
103
|
+
verdict = exp.verdict or "?"
|
|
104
|
+
context = exp.context or "(no context)"
|
|
105
|
+
head = f" {i}. [{verdict}] {context}"
|
|
106
|
+
if exp.patterns:
|
|
107
|
+
head += f" — patterns: {', '.join(exp.patterns)}"
|
|
108
|
+
lines.append(head)
|
|
109
|
+
for blocker in (exp.blockers or [])[:3]:
|
|
110
|
+
lines.append(f" - {blocker}")
|
|
111
|
+
if exp.fix_applied:
|
|
112
|
+
lines.append(f" fix: {exp.fix_applied}")
|
|
113
|
+
if exp.references:
|
|
114
|
+
refs = ", ".join(exp.references[:2])
|
|
115
|
+
lines.append(f" refs: {refs}")
|
|
116
|
+
lines.append("Apply these lessons proactively. Do not repeat the rejected patterns.")
|
|
117
|
+
return "\n".join(lines)
|
package/core/synapse/engine.py
CHANGED
|
@@ -187,6 +187,7 @@ def create_default_engine(
|
|
|
187
187
|
ForgeContextLayer,
|
|
188
188
|
SessionContextLayer,
|
|
189
189
|
)
|
|
190
|
+
from core.synapse.agent_experiences_layer import AgentExperiencesLayer
|
|
190
191
|
|
|
191
192
|
engine = SynapseEngine()
|
|
192
193
|
|
|
@@ -194,6 +195,10 @@ def create_default_engine(
|
|
|
194
195
|
engine.register_layer(l0)
|
|
195
196
|
engine.register_layer(DepartmentLayer())
|
|
196
197
|
engine.register_layer(AgentLayer(agents_registry=agents_registry))
|
|
198
|
+
# L2.6 (PR3.5 v3.74.1) — injects past Quality Gate experiences for the
|
|
199
|
+
# specialist named in `[arka:dispatch]`, so dispatched agents inherit
|
|
200
|
+
# prior REJECTED lessons across sessions. Closes the PR3 loop.
|
|
201
|
+
engine.register_layer(AgentExperiencesLayer())
|
|
197
202
|
if vector_store is not None or kb_vault_path:
|
|
198
203
|
engine.register_layer(
|
|
199
204
|
KBContextLayer(
|
package/installer/cli.js
CHANGED
|
@@ -21,6 +21,10 @@ const { values, positionals } = parseArgs({
|
|
|
21
21
|
force: { type: "boolean", short: "f" },
|
|
22
22
|
"no-system": { type: "boolean" },
|
|
23
23
|
"with-ollama": { type: "boolean" },
|
|
24
|
+
// PR3.5 v3.74.1 — declared so `npx arkaos doctor --fix` lands in
|
|
25
|
+
// `values.fix` rather than as a free positional under strict:false.
|
|
26
|
+
// Eliminates the dead-branch fallback flagged by Marta in PR2's QG.
|
|
27
|
+
fix: { type: "boolean" },
|
|
24
28
|
},
|
|
25
29
|
allowPositionals: true,
|
|
26
30
|
strict: false,
|
|
@@ -94,8 +98,7 @@ async function main() {
|
|
|
94
98
|
|
|
95
99
|
case "doctor": {
|
|
96
100
|
const { doctor } = await import("./doctor.js");
|
|
97
|
-
|
|
98
|
-
await doctor({ fix: fixMode });
|
|
101
|
+
await doctor({ fix: values.fix === true });
|
|
99
102
|
break;
|
|
100
103
|
}
|
|
101
104
|
|
package/package.json
CHANGED