@event4u/agent-config 2.10.0 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/agents.md +1 -0
- package/.agent-src/commands/challenge-me.md +1 -0
- package/.agent-src/commands/chat-history.md +1 -0
- package/.agent-src/commands/context.md +1 -0
- package/.agent-src/commands/council.md +1 -0
- package/.agent-src/commands/feature.md +1 -0
- package/.agent-src/commands/fix.md +1 -0
- package/.agent-src/commands/grill-me.md +1 -0
- package/.agent-src/commands/judge.md +1 -0
- package/.agent-src/commands/memory.md +1 -0
- package/.agent-src/commands/module.md +1 -0
- package/.agent-src/commands/onboard.md +32 -4
- package/.agent-src/commands/optimize.md +1 -0
- package/.agent-src/commands/override.md +1 -0
- package/.agent-src/commands/roadmap.md +1 -0
- package/.agent-src/commands/tests.md +1 -0
- package/.agent-src/skills/canvas-design/SKILL.md +132 -0
- package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
- package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
- package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
- package/.agent-src/skills/nextjs-patterns/SKILL.md +203 -0
- package/.agent-src/skills/skill-writing/SKILL.md +101 -16
- package/.agent-src/skills/sql-writing/SKILL.md +1 -1
- package/.agent-src/skills/symfony-workflow/SKILL.md +173 -0
- package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +4 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +3 -0
- package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_gate.py +162 -0
- package/.agent-src/templates/scripts/work_engine/hooks/settings.py +24 -6
- package/.agent-src/templates/scripts/work_engine/scoring/decision_engine.py +351 -0
- package/.claude-plugin/marketplace.json +5 -1
- package/CHANGELOG.md +68 -0
- package/README.md +37 -8
- package/config/agent-settings.template.yml +66 -0
- package/docs/architecture.md +1 -1
- package/docs/contracts/STABILITY.md +16 -0
- package/docs/contracts/adr-chat-history-split.md +1 -0
- package/docs/contracts/adr-forecast-construction-shape.md +1 -0
- package/docs/contracts/adr-gtm-context-spine.md +1 -0
- package/docs/contracts/adr-level-6-productization.md +147 -0
- package/docs/contracts/adr-settings-sync-engine.md +1 -0
- package/docs/contracts/adr-wing4-context-spine.md +1 -0
- package/docs/contracts/agent-memory-contract.md +1 -0
- package/docs/contracts/agents-md-tech-stack.md +1 -0
- package/docs/contracts/audit-log-v1.md +1 -0
- package/docs/contracts/command-clusters.md +1 -0
- package/docs/contracts/command-surface-tiers.md +1 -0
- package/docs/contracts/context-paths.md +1 -0
- package/docs/contracts/cost-profile-defaults.md +105 -0
- package/docs/contracts/cross-wing-handoff.md +1 -0
- package/docs/contracts/decision-engine-gates.md +115 -0
- package/docs/contracts/decision-trace-v1.md +1 -0
- package/docs/contracts/file-ownership-matrix.md +1 -0
- package/docs/contracts/hook-architecture-v1.md +1 -0
- package/docs/contracts/implement-ticket-flow.md +1 -0
- package/docs/contracts/installed-tools-lockfile.md +1 -0
- package/docs/contracts/kernel-membership.md +1 -0
- package/docs/contracts/linear-ai-rules-inclusion.md +1 -0
- package/docs/contracts/linear-ai-three-layers.md +1 -0
- package/docs/contracts/linter-structural-model.md +1 -0
- package/docs/contracts/load-context-budget-model.md +1 -0
- package/docs/contracts/load-context-schema.md +1 -0
- package/docs/contracts/memory-visibility-v1.md +1 -0
- package/docs/contracts/one-off-script-lifecycle.md +1 -0
- package/docs/contracts/orchestration-dsl-v1.md +1 -0
- package/docs/contracts/package-self-orientation.md +1 -0
- package/docs/contracts/persona-schema.md +1 -0
- package/docs/contracts/release-trunk-sync.md +104 -0
- package/docs/contracts/roadmap-complexity-standard.md +1 -0
- package/docs/contracts/rule-classification.md +1 -0
- package/docs/contracts/rule-interactions.md +26 -0
- package/docs/contracts/rule-priority-hierarchy.md +1 -0
- package/docs/contracts/rule-router.md +1 -0
- package/docs/contracts/settings-sync-yaml-subset.md +1 -0
- package/docs/contracts/skill-domains.md +1 -0
- package/docs/contracts/tier-3-contrib-plugin.md +1 -0
- package/docs/contracts/ui-stack-extension.md +1 -0
- package/docs/contracts/ui-track-flow.md +1 -0
- package/docs/customization.md +1 -1
- package/docs/getting-started.md +3 -1
- package/docs/installation.md +8 -6
- package/package.json +1 -1
- package/scripts/ai_council/clients.py +17 -4
- package/scripts/ai_council/orchestrator.py +6 -2
- package/scripts/check_beta_review_markers.py +127 -0
- package/scripts/check_references.py +25 -0
- package/scripts/check_release_trunk_sync.py +152 -0
- package/scripts/council_cli.py +36 -5
- package/scripts/install.py +3 -3
- package/scripts/run_skill_evals.py +185 -0
- package/scripts/schemas/command.schema.json +5 -0
- package/scripts/schemas/skill.schema.json +4 -0
- package/scripts/skill_linter.py +82 -3
- package/scripts/smoke_quickstart.py +134 -0
- package/scripts/validate_decision_engine.py +124 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Beta-review-marker checker for `docs/contracts/`.
|
|
4
|
+
|
|
5
|
+
Every contract whose frontmatter declares `stability: beta` MUST carry
|
|
6
|
+
exactly one of the following frontmatter markers (per
|
|
7
|
+
`docs/contracts/STABILITY.md` § Beta-review markers, ratified in
|
|
8
|
+
`road-to-productization.md` § P5.4):
|
|
9
|
+
|
|
10
|
+
- `promote-to: stable`
|
|
11
|
+
- `keep-beta-until: YYYY-MM-DD` (max 90 days from the last review)
|
|
12
|
+
- `superseded-by: <contract-id>`
|
|
13
|
+
|
|
14
|
+
Exit codes: 0 = clean, 1 = violations found, 3 = internal error.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
python3 scripts/check_beta_review_markers.py
|
|
18
|
+
python3 scripts/check_beta_review_markers.py --json
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import json
|
|
25
|
+
import re
|
|
26
|
+
import sys
|
|
27
|
+
from dataclasses import asdict, dataclass
|
|
28
|
+
from datetime import date, timedelta
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
ROOT = Path(__file__).resolve().parent.parent
|
|
32
|
+
CONTRACTS_DIR = Path("docs/contracts")
|
|
33
|
+
|
|
34
|
+
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
|
|
35
|
+
STABILITY_RE = re.compile(r"^stability:\s*(\w+)\s*$", re.MULTILINE)
|
|
36
|
+
PROMOTE_RE = re.compile(r"^promote-to:\s*stable\s*$", re.MULTILINE)
|
|
37
|
+
KEEP_RE = re.compile(r"^keep-beta-until:\s*(\d{4}-\d{2}-\d{2})\s*$", re.MULTILINE)
|
|
38
|
+
SUPERSEDED_RE = re.compile(r"^superseded-by:\s*\S+\s*$", re.MULTILINE)
|
|
39
|
+
|
|
40
|
+
MAX_REVIEW_WINDOW_DAYS = 90
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Violation:
|
|
45
|
+
file: str
|
|
46
|
+
reason: str
|
|
47
|
+
severity: str # "error" | "warning"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def read_frontmatter(path: Path) -> str | None:
|
|
51
|
+
if not path.exists():
|
|
52
|
+
return None
|
|
53
|
+
txt = path.read_text(encoding="utf-8")
|
|
54
|
+
m = FRONTMATTER_RE.match(txt)
|
|
55
|
+
return m.group(1) if m else None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def check_one(path: Path, today: date) -> list[Violation]:
|
|
59
|
+
fm = read_frontmatter(path)
|
|
60
|
+
if fm is None:
|
|
61
|
+
return []
|
|
62
|
+
sm = STABILITY_RE.search(fm)
|
|
63
|
+
if not sm or sm.group(1) != "beta":
|
|
64
|
+
return []
|
|
65
|
+
markers = [
|
|
66
|
+
("promote-to", bool(PROMOTE_RE.search(fm))),
|
|
67
|
+
("keep-beta-until", bool(KEEP_RE.search(fm))),
|
|
68
|
+
("superseded-by", bool(SUPERSEDED_RE.search(fm))),
|
|
69
|
+
]
|
|
70
|
+
set_markers = [name for name, present in markers if present]
|
|
71
|
+
rel = str(path.relative_to(ROOT))
|
|
72
|
+
if not set_markers:
|
|
73
|
+
return [Violation(
|
|
74
|
+
file=rel,
|
|
75
|
+
reason="stability=beta but no review marker; add one of "
|
|
76
|
+
"`promote-to: stable` | `keep-beta-until: <date>` | "
|
|
77
|
+
"`superseded-by: <id>` (see STABILITY.md § Beta-review markers)",
|
|
78
|
+
severity="error",
|
|
79
|
+
)]
|
|
80
|
+
if len(set_markers) > 1:
|
|
81
|
+
return [Violation(
|
|
82
|
+
file=rel,
|
|
83
|
+
reason=f"multiple beta-review markers set ({', '.join(set_markers)}); "
|
|
84
|
+
"exactly one is allowed",
|
|
85
|
+
severity="error",
|
|
86
|
+
)]
|
|
87
|
+
km = KEEP_RE.search(fm)
|
|
88
|
+
if km:
|
|
89
|
+
review_date = date.fromisoformat(km.group(1))
|
|
90
|
+
max_date = today + timedelta(days=MAX_REVIEW_WINDOW_DAYS)
|
|
91
|
+
if review_date > max_date:
|
|
92
|
+
return [Violation(
|
|
93
|
+
file=rel,
|
|
94
|
+
reason=f"keep-beta-until={review_date} exceeds the "
|
|
95
|
+
f"{MAX_REVIEW_WINDOW_DAYS}-day window (max: {max_date})",
|
|
96
|
+
severity="error",
|
|
97
|
+
)]
|
|
98
|
+
return []
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def main() -> int:
|
|
102
|
+
ap = argparse.ArgumentParser()
|
|
103
|
+
ap.add_argument("--json", action="store_true", help="machine-readable output")
|
|
104
|
+
args = ap.parse_args()
|
|
105
|
+
today = date.today()
|
|
106
|
+
violations: list[Violation] = []
|
|
107
|
+
for p in sorted((ROOT / CONTRACTS_DIR).glob("*.md")):
|
|
108
|
+
violations.extend(check_one(p, today))
|
|
109
|
+
if args.json:
|
|
110
|
+
print(json.dumps({"violations": [asdict(v) for v in violations]}, indent=2))
|
|
111
|
+
else:
|
|
112
|
+
if not violations:
|
|
113
|
+
print("✅ All beta contracts carry a valid review marker.")
|
|
114
|
+
else:
|
|
115
|
+
for v in violations:
|
|
116
|
+
icon = "❌" if v.severity == "error" else "⚠️ "
|
|
117
|
+
print(f"{icon} {v.file}: {v.reason}")
|
|
118
|
+
print(f"\n{len(violations)} violation(s).")
|
|
119
|
+
return 1 if any(v.severity == "error" for v in violations) else 0
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
if __name__ == "__main__":
|
|
123
|
+
try:
|
|
124
|
+
sys.exit(main())
|
|
125
|
+
except Exception as exc: # pragma: no cover
|
|
126
|
+
print(f"internal error: {exc}", file=sys.stderr)
|
|
127
|
+
sys.exit(3)
|
|
@@ -39,6 +39,17 @@ SKIP_DIRS = [
|
|
|
39
39
|
"agents/council-questions", # design Q&A trail — forward-refs to planned artifacts
|
|
40
40
|
"agents/analysis", # plate-comparison working docs — forward-refs to planned artifacts
|
|
41
41
|
]
|
|
42
|
+
|
|
43
|
+
# Per-file opt-out marker. When present in the first 10 lines of a .md
|
|
44
|
+
# file, the entire file is skipped. Use for working docs that
|
|
45
|
+
# intentionally reference planned-but-not-yet-existing artifacts
|
|
46
|
+
# (audit bundles, design Q&A, in-flight plans).
|
|
47
|
+
FILE_SKIP_MARKER = "<!-- check-refs: skip -->"
|
|
48
|
+
|
|
49
|
+
# Per-line opt-out marker. When present anywhere on a line, that line's
|
|
50
|
+
# refs are skipped. Use for isolated forward-refs inside otherwise
|
|
51
|
+
# fully-checked documents.
|
|
52
|
+
LINE_IGNORE_MARKER = "<!-- ref-ignore -->"
|
|
42
53
|
ROOT = Path(".")
|
|
43
54
|
|
|
44
55
|
# YAML memory files (engineering-memory layer) live under `agents/memory/`.
|
|
@@ -219,6 +230,14 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
|
|
|
219
230
|
except Exception:
|
|
220
231
|
return broken
|
|
221
232
|
|
|
233
|
+
# File-level opt-out: working docs that intentionally reference
|
|
234
|
+
# planned-but-not-yet-existing artifacts mark themselves with
|
|
235
|
+
# `<!-- check-refs: skip -->` in the first 10 lines. Marker pairs
|
|
236
|
+
# with the per-line `<!-- ref-ignore -->` below; either suffices.
|
|
237
|
+
header_lines = text.splitlines()[:10]
|
|
238
|
+
if any(FILE_SKIP_MARKER in line for line in header_lines):
|
|
239
|
+
return broken
|
|
240
|
+
|
|
222
241
|
# Validate `personas:` frontmatter entries against known persona ids.
|
|
223
242
|
for line_no, pid in _extract_personas_frontmatter(text):
|
|
224
243
|
if pid not in artifacts["personas"]:
|
|
@@ -241,6 +260,12 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
|
|
|
241
260
|
if in_code_block:
|
|
242
261
|
continue
|
|
243
262
|
|
|
263
|
+
# Per-line opt-out: isolated forward-refs in otherwise checked
|
|
264
|
+
# documents (e.g. one ref to a planned skill, surrounded by
|
|
265
|
+
# valid refs). Skip the whole line's path / skill / rule checks.
|
|
266
|
+
if LINE_IGNORE_MARKER in line:
|
|
267
|
+
continue
|
|
268
|
+
|
|
244
269
|
# Unchecked TODO checkboxes document future work — their refs are
|
|
245
270
|
# forward-looking and will not resolve yet. Track multi-line bullets:
|
|
246
271
|
# any `- [ ]` opens a TODO context; a new top-level bullet, heading,
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Release-trunk-sync CI gate (road-to-productization P1.3).
|
|
4
|
+
|
|
5
|
+
Fails if `main` is more than one tagged release behind the current
|
|
6
|
+
release-prep branch's target version. No-ops on every other branch
|
|
7
|
+
class. Owner contract: `docs/contracts/release-trunk-sync.md`.
|
|
8
|
+
|
|
9
|
+
Exit codes: 0 = pass / no-op, 1 = main is too far behind, 3 = internal
|
|
10
|
+
error (git unavailable, malformed tag, etc.).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
RELEASE_BRANCH_RE = re.compile(r"^release/(\d+)\.(\d+)\.(\d+)$")
|
|
22
|
+
SEMVER_TAG_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)$")
|
|
23
|
+
BOOTSTRAP_FILE = Path("docs/contracts/release-trunk-sync.bootstrap")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _git(*args: str) -> str:
|
|
27
|
+
proc = subprocess.run(
|
|
28
|
+
["git", *args], capture_output=True, text=True, check=False
|
|
29
|
+
)
|
|
30
|
+
if proc.returncode != 0:
|
|
31
|
+
return ""
|
|
32
|
+
return proc.stdout.strip()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _current_branch() -> str:
|
|
36
|
+
return _git("rev-parse", "--abbrev-ref", "HEAD")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _parse_semver(text: str) -> tuple[int, int, int] | None:
|
|
40
|
+
m = SEMVER_TAG_RE.match(text)
|
|
41
|
+
if not m:
|
|
42
|
+
return None
|
|
43
|
+
return int(m.group(1)), int(m.group(2)), int(m.group(3))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _all_tags() -> list[tuple[int, int, int]]:
|
|
47
|
+
raw = _git("tag", "--list")
|
|
48
|
+
tags = []
|
|
49
|
+
for line in raw.splitlines():
|
|
50
|
+
parsed = _parse_semver(line.strip())
|
|
51
|
+
if parsed is not None:
|
|
52
|
+
tags.append(parsed)
|
|
53
|
+
tags.sort()
|
|
54
|
+
return tags
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _main_tag() -> tuple[int, int, int] | None:
|
|
58
|
+
"""Highest semver tag whose commit is reachable from main."""
|
|
59
|
+
# Try local main, fall back to origin/main.
|
|
60
|
+
for ref in ("refs/heads/main", "refs/remotes/origin/main"):
|
|
61
|
+
head = _git("rev-parse", "--verify", ref)
|
|
62
|
+
if head:
|
|
63
|
+
break
|
|
64
|
+
else:
|
|
65
|
+
return None
|
|
66
|
+
# `git tag --merged <main>` lists tags reachable from main.
|
|
67
|
+
raw = _git("tag", "--merged", head)
|
|
68
|
+
reachable: list[tuple[int, int, int]] = []
|
|
69
|
+
for line in raw.splitlines():
|
|
70
|
+
parsed = _parse_semver(line.strip())
|
|
71
|
+
if parsed is not None:
|
|
72
|
+
reachable.append(parsed)
|
|
73
|
+
if not reachable:
|
|
74
|
+
return None
|
|
75
|
+
return max(reachable)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _prior_release(
|
|
79
|
+
target: tuple[int, int, int], tags: list[tuple[int, int, int]]
|
|
80
|
+
) -> tuple[int, int, int] | None:
|
|
81
|
+
earlier = [t for t in tags if t < target]
|
|
82
|
+
return max(earlier) if earlier else None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _bootstrap_ok(target: tuple[int, int, int]) -> bool:
|
|
86
|
+
if not BOOTSTRAP_FILE.exists():
|
|
87
|
+
return False
|
|
88
|
+
target_s = "{0}.{1}.{2}".format(*target)
|
|
89
|
+
for line in BOOTSTRAP_FILE.read_text().splitlines():
|
|
90
|
+
line = line.strip()
|
|
91
|
+
if not line or line.startswith("#"):
|
|
92
|
+
continue
|
|
93
|
+
if line == target_s:
|
|
94
|
+
return True
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def main() -> int:
|
|
99
|
+
branch = _current_branch()
|
|
100
|
+
if branch == "HEAD" or not branch:
|
|
101
|
+
print("::warning::detached HEAD — release-trunk-sync gate skipped")
|
|
102
|
+
return 0
|
|
103
|
+
# CI override: GitHub Actions sometimes runs on the merge ref.
|
|
104
|
+
ci_ref = os.environ.get("GITHUB_HEAD_REF") or os.environ.get(
|
|
105
|
+
"GITHUB_REF_NAME"
|
|
106
|
+
)
|
|
107
|
+
if ci_ref:
|
|
108
|
+
branch = ci_ref
|
|
109
|
+
m = RELEASE_BRANCH_RE.match(branch)
|
|
110
|
+
if not m:
|
|
111
|
+
return 0 # non-release branch class — gate is a no-op
|
|
112
|
+
target = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
|
|
113
|
+
tags = _all_tags()
|
|
114
|
+
if not tags:
|
|
115
|
+
print(
|
|
116
|
+
"::warning::no semver tags found — release-trunk-sync gate skipped"
|
|
117
|
+
)
|
|
118
|
+
return 0
|
|
119
|
+
main_tag = _main_tag()
|
|
120
|
+
if main_tag is None:
|
|
121
|
+
print(
|
|
122
|
+
"::warning::no semver tag reachable from main — gate skipped"
|
|
123
|
+
)
|
|
124
|
+
return 0
|
|
125
|
+
if main_tag >= target:
|
|
126
|
+
return 0 # main already at or ahead of release target
|
|
127
|
+
prior = _prior_release(target, tags)
|
|
128
|
+
if prior is not None and main_tag >= prior:
|
|
129
|
+
return 0 # within the one-release tolerance
|
|
130
|
+
if _bootstrap_ok(target):
|
|
131
|
+
target_s = "{0}.{1}.{2}".format(*target)
|
|
132
|
+
print(
|
|
133
|
+
f"::warning::release-trunk-sync gate suppressed for {target_s} "
|
|
134
|
+
"via bootstrap file"
|
|
135
|
+
)
|
|
136
|
+
return 0
|
|
137
|
+
main_s = "{0}.{1}.{2}".format(*main_tag)
|
|
138
|
+
target_s = "{0}.{1}.{2}".format(*target)
|
|
139
|
+
print(
|
|
140
|
+
f"::error::main is at {main_s}; release-prep branch targets "
|
|
141
|
+
f"{target_s}. Main must be no more than one tagged release behind. "
|
|
142
|
+
"See docs/contracts/release-trunk-sync.md."
|
|
143
|
+
)
|
|
144
|
+
return 1
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
if __name__ == "__main__":
|
|
148
|
+
try:
|
|
149
|
+
sys.exit(main())
|
|
150
|
+
except Exception as exc: # noqa: BLE001
|
|
151
|
+
print(f"::error::release-trunk-sync gate internal error: {exc}")
|
|
152
|
+
sys.exit(3)
|
package/scripts/council_cli.py
CHANGED
|
@@ -31,6 +31,7 @@ from scripts.ai_council.bundler import ( # noqa: E402
|
|
|
31
31
|
BundleTooLarge, bundle_prompt, bundle_roadmap,
|
|
32
32
|
)
|
|
33
33
|
from scripts.ai_council.clients import ( # noqa: E402
|
|
34
|
+
DEFAULT_MAX_TOKENS, UNLIMITED_TOKENS_FALLBACK,
|
|
34
35
|
AnthropicClient, CouncilResponse, ExternalAIClient, ManualClient,
|
|
35
36
|
OpenAIClient, load_anthropic_key, load_openai_key,
|
|
36
37
|
)
|
|
@@ -236,6 +237,32 @@ def _resolve_rounds(args: argparse.Namespace, ai_cfg: dict[str, Any]) -> int:
|
|
|
236
237
|
return min_rounds
|
|
237
238
|
|
|
238
239
|
|
|
240
|
+
def _resolve_max_tokens(args: argparse.Namespace, ai_cfg: dict[str, Any]) -> int:
|
|
241
|
+
"""Resolve the per-call output budget passed to each member.
|
|
242
|
+
|
|
243
|
+
Resolution chain (highest priority first):
|
|
244
|
+
1. ``--max-tokens N`` — explicit invocation override.
|
|
245
|
+
2. ``ai_council.max_output_tokens`` — settings value (project file
|
|
246
|
+
is authoritative; this key is not user-global-mergeable).
|
|
247
|
+
3. ``DEFAULT_MAX_TOKENS`` — package fallback (2048).
|
|
248
|
+
|
|
249
|
+
A value of ``0`` at any layer means "unlimited"; it is widened to
|
|
250
|
+
``UNLIMITED_TOKENS_FALLBACK`` before reaching the SDK because
|
|
251
|
+
Anthropic rejects ``max_tokens=0``. Estimation uses the same expanded
|
|
252
|
+
value so the cost preview reflects the worst-case ceiling.
|
|
253
|
+
"""
|
|
254
|
+
cli = getattr(args, "max_tokens", None)
|
|
255
|
+
if cli is not None:
|
|
256
|
+
value = int(cli)
|
|
257
|
+
elif "max_output_tokens" in ai_cfg:
|
|
258
|
+
value = int(ai_cfg.get("max_output_tokens") or 0)
|
|
259
|
+
else:
|
|
260
|
+
value = DEFAULT_MAX_TOKENS
|
|
261
|
+
if value <= 0:
|
|
262
|
+
return UNLIMITED_TOKENS_FALLBACK
|
|
263
|
+
return value
|
|
264
|
+
|
|
265
|
+
|
|
239
266
|
def cmd_estimate(
|
|
240
267
|
args: argparse.Namespace,
|
|
241
268
|
*,
|
|
@@ -255,9 +282,10 @@ def cmd_estimate(
|
|
|
255
282
|
)
|
|
256
283
|
if table is None:
|
|
257
284
|
table = load_prices()
|
|
285
|
+
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
258
286
|
question, _ = build_question(
|
|
259
287
|
input_path=Path(args.question), input_mode=args.input_mode,
|
|
260
|
-
max_tokens=args
|
|
288
|
+
max_tokens=_resolve_max_tokens(args, ai_cfg),
|
|
261
289
|
)
|
|
262
290
|
project = detect_project_context(REPO_ROOT)
|
|
263
291
|
billable = [m for m in members if getattr(m, "billable", True)]
|
|
@@ -316,9 +344,10 @@ def cmd_run(
|
|
|
316
344
|
)
|
|
317
345
|
if table is None:
|
|
318
346
|
table = load_prices()
|
|
347
|
+
ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
|
|
319
348
|
question, artefact = build_question(
|
|
320
349
|
input_path=Path(args.question), input_mode=args.input_mode,
|
|
321
|
-
max_tokens=args
|
|
350
|
+
max_tokens=_resolve_max_tokens(args, ai_cfg),
|
|
322
351
|
)
|
|
323
352
|
project = detect_project_context(REPO_ROOT)
|
|
324
353
|
billable = [m for m in members if getattr(m, "billable", True)]
|
|
@@ -337,7 +366,6 @@ def cmd_run(
|
|
|
337
366
|
)
|
|
338
367
|
return 0
|
|
339
368
|
|
|
340
|
-
ai_cfg = settings.get("ai_council") or {}
|
|
341
369
|
cost_cfg = ai_cfg.get("cost_budget") or {}
|
|
342
370
|
budget = CostBudget(
|
|
343
371
|
max_input_tokens=int(cost_cfg.get("max_input_tokens", 50_000)),
|
|
@@ -451,8 +479,11 @@ def _add_common_input_args(p: argparse.ArgumentParser) -> None:
|
|
|
451
479
|
p.add_argument("--input-mode", choices=["prompt", "roadmap"],
|
|
452
480
|
default="prompt",
|
|
453
481
|
help="How to bundle the file (default: prompt).")
|
|
454
|
-
p.add_argument("--max-tokens", type=int, default=
|
|
455
|
-
help="Per-member output budget
|
|
482
|
+
p.add_argument("--max-tokens", type=int, default=None,
|
|
483
|
+
help="Per-member output budget. Default reads "
|
|
484
|
+
"ai_council.max_output_tokens from .agent-settings.yml "
|
|
485
|
+
"(2048 if unset). 0 = unlimited (widened to the safe "
|
|
486
|
+
"provider ceiling before the SDK call).")
|
|
456
487
|
p.add_argument("--mode-override", choices=["api", "manual"], default=None,
|
|
457
488
|
help="Override every member's transport mode.")
|
|
458
489
|
p.add_argument("--model", action="append", default=None, dest="model",
|
package/scripts/install.py
CHANGED
|
@@ -12,8 +12,8 @@ format in `.agent-settings.yml`, leaves a one-shot backup as
|
|
|
12
12
|
exactly once; subsequent runs are idempotent.
|
|
13
13
|
|
|
14
14
|
Usage:
|
|
15
|
-
python3 scripts/install.py # defaults: cost_profile=
|
|
16
|
-
python3 scripts/install.py --profile=
|
|
15
|
+
python3 scripts/install.py # defaults: cost_profile=balanced
|
|
16
|
+
python3 scripts/install.py --profile=minimal # set cost_profile=minimal (kernel only)
|
|
17
17
|
python3 scripts/install.py --force # overwrite existing files
|
|
18
18
|
python3 scripts/install.py --skip-bridges # only create .agent-settings.yml
|
|
19
19
|
python3 scripts/install.py --project <dir> # override project root
|
|
@@ -42,7 +42,7 @@ try:
|
|
|
42
42
|
except ImportError: # pragma: no cover — alt sys.path layout
|
|
43
43
|
from _lib.json_pointers import build_merge_entries # type: ignore[no-redef] # noqa: PLC0415
|
|
44
44
|
|
|
45
|
-
DEFAULT_PROFILE = "
|
|
45
|
+
DEFAULT_PROFILE = "balanced"
|
|
46
46
|
SUPPORTED_PROFILES = ("minimal", "balanced", "full")
|
|
47
47
|
COST_PROFILE_PLACEHOLDER = "__COST_PROFILE__"
|
|
48
48
|
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Quantitative skill-eval orchestrator (skill-writing § 7).
|
|
3
|
+
|
|
4
|
+
Scaffolds, aggregates, and reports sub-agent eval runs for a skill.
|
|
5
|
+
|
|
6
|
+
Sub-agent SPAWNING is per-environment (Claude Code, Augment Code,
|
|
7
|
+
council) and is left as a stub `_spawn_subagent(...)` that authors
|
|
8
|
+
implement once for their environment. The rest of the loop —
|
|
9
|
+
scaffold / aggregate / report — works out of the box and reads /
|
|
10
|
+
writes JSON files in `runs/`.
|
|
11
|
+
|
|
12
|
+
Layout per skill:
|
|
13
|
+
|
|
14
|
+
.agent-src.uncompressed/skills/{name}/evals/
|
|
15
|
+
evals.json
|
|
16
|
+
runs/ # gitignored
|
|
17
|
+
{timestamp}-baseline/{scenario_id}/output.txt
|
|
18
|
+
{timestamp}-baseline/{scenario_id}/grade.json
|
|
19
|
+
{timestamp}-with-skill/{scenario_id}/output.txt
|
|
20
|
+
{timestamp}-with-skill/{scenario_id}/grade.json
|
|
21
|
+
{timestamp}-benchmark.json
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import json
|
|
28
|
+
import sys
|
|
29
|
+
from datetime import datetime, timezone
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any
|
|
32
|
+
|
|
33
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
34
|
+
SKILLS_ROOT = REPO_ROOT / ".agent-src.uncompressed" / "skills"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _skill_dir(skill: str) -> Path:
|
|
38
|
+
p = SKILLS_ROOT / skill
|
|
39
|
+
if not p.is_dir():
|
|
40
|
+
sys.exit(f"error: skill {skill!r} not found at {p}")
|
|
41
|
+
return p
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _evals_dir(skill: str) -> Path:
|
|
45
|
+
return _skill_dir(skill) / "evals"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _load_evals(skill: str) -> dict[str, Any]:
|
|
49
|
+
f = _evals_dir(skill) / "evals.json"
|
|
50
|
+
if not f.exists():
|
|
51
|
+
sys.exit(f"error: {f} not found — create it before scaffolding")
|
|
52
|
+
return json.loads(f.read_text(encoding="utf-8"))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _timestamp() -> str:
|
|
56
|
+
return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _spawn_subagent(prompt: str, *, load_skill: str | None) -> dict[str, Any]:
|
|
60
|
+
"""STUB — implement per environment.
|
|
61
|
+
|
|
62
|
+
Must return {"output": str, "elapsed_s": float, "tokens_in": int,
|
|
63
|
+
"tokens_out": int}. When load_skill is None, run baseline; when
|
|
64
|
+
set, load that skill into the sub-agent's context.
|
|
65
|
+
"""
|
|
66
|
+
raise NotImplementedError(
|
|
67
|
+
"implement _spawn_subagent for this environment (Claude Code, "
|
|
68
|
+
"Augment, council, ...) — see docstring contract"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _grade_assertions(output: str, run_dir: Path, assertions: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
73
|
+
results: list[dict[str, Any]] = []
|
|
74
|
+
for a in assertions:
|
|
75
|
+
kind = a.get("kind")
|
|
76
|
+
if kind == "contains":
|
|
77
|
+
ok = a["value"] in output
|
|
78
|
+
results.append({"kind": kind, "value": a["value"], "pass": ok})
|
|
79
|
+
elif kind == "file_exists":
|
|
80
|
+
ok = (run_dir / a["path"]).exists() or Path(a["path"]).exists()
|
|
81
|
+
results.append({"kind": kind, "path": a["path"], "pass": ok})
|
|
82
|
+
elif kind == "rubric":
|
|
83
|
+
results.append({"kind": kind, "criterion": a["criterion"], "pass": None,
|
|
84
|
+
"note": "rubric grading requires sub-agent — fill in manually or via grader"})
|
|
85
|
+
else:
|
|
86
|
+
results.append({"kind": kind, "pass": False, "note": f"unknown assertion kind {kind!r}"})
|
|
87
|
+
return results
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def cmd_scaffold(skill: str) -> int:
|
|
91
|
+
spec = _load_evals(skill)
|
|
92
|
+
scenarios = spec.get("scenarios", [])
|
|
93
|
+
if not scenarios:
|
|
94
|
+
sys.exit("error: evals.json has no scenarios")
|
|
95
|
+
ts = _timestamp()
|
|
96
|
+
runs = _evals_dir(skill) / "runs"
|
|
97
|
+
for arm in ("baseline", "with-skill"):
|
|
98
|
+
for sc in scenarios:
|
|
99
|
+
d = runs / f"{ts}-{arm}" / sc["id"]
|
|
100
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
(d / "meta.json").write_text(json.dumps({
|
|
102
|
+
"skill": skill, "arm": arm, "scenario_id": sc["id"],
|
|
103
|
+
"prompt": sc["prompt"], "assertions": sc.get("assertions", []),
|
|
104
|
+
"timestamp": ts,
|
|
105
|
+
}, indent=2) + "\n", encoding="utf-8")
|
|
106
|
+
print(f"scaffolded {len(scenarios)} scenarios × 2 arms at runs/{ts}-{{baseline,with-skill}}/")
|
|
107
|
+
print(f"timestamp: {ts}")
|
|
108
|
+
return 0
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def cmd_aggregate(skill: str, run: str) -> int:
|
|
112
|
+
runs = _evals_dir(skill) / "runs"
|
|
113
|
+
spec = _load_evals(skill)
|
|
114
|
+
bench: dict[str, Any] = {"skill": skill, "run": run, "generated_at": _timestamp(), "scenarios": []}
|
|
115
|
+
totals = {"baseline_pass": 0, "with_skill_pass": 0, "scenarios": 0}
|
|
116
|
+
for sc in spec.get("scenarios", []):
|
|
117
|
+
row: dict[str, Any] = {"id": sc["id"], "arms": {}}
|
|
118
|
+
for arm in ("baseline", "with-skill"):
|
|
119
|
+
run_dir = runs / f"{run}-{arm}" / sc["id"]
|
|
120
|
+
grade_f = run_dir / "grade.json"
|
|
121
|
+
if not grade_f.exists():
|
|
122
|
+
row["arms"][arm] = {"status": "missing", "pass_count": 0, "total": 0}
|
|
123
|
+
continue
|
|
124
|
+
g = json.loads(grade_f.read_text(encoding="utf-8"))
|
|
125
|
+
results = g.get("results", [])
|
|
126
|
+
passed = sum(1 for r in results if r.get("pass") is True)
|
|
127
|
+
row["arms"][arm] = {"status": "graded", "pass_count": passed, "total": len(results),
|
|
128
|
+
"elapsed_s": g.get("elapsed_s"), "tokens_in": g.get("tokens_in"),
|
|
129
|
+
"tokens_out": g.get("tokens_out")}
|
|
130
|
+
if arm == "baseline" and passed == len(results) and results:
|
|
131
|
+
totals["baseline_pass"] += 1
|
|
132
|
+
if arm == "with-skill" and passed == len(results) and results:
|
|
133
|
+
totals["with_skill_pass"] += 1
|
|
134
|
+
bench["scenarios"].append(row)
|
|
135
|
+
totals["scenarios"] += 1
|
|
136
|
+
bench["totals"] = totals
|
|
137
|
+
out = runs / f"{run}-benchmark.json"
|
|
138
|
+
out.write_text(json.dumps(bench, indent=2) + "\n", encoding="utf-8")
|
|
139
|
+
print(f"wrote {out.relative_to(REPO_ROOT)}")
|
|
140
|
+
print(f" baseline pass: {totals['baseline_pass']}/{totals['scenarios']}")
|
|
141
|
+
print(f" with-skill pass: {totals['with_skill_pass']}/{totals['scenarios']}")
|
|
142
|
+
return 0
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def cmd_report(skill: str, run: str) -> int:
|
|
146
|
+
bench_f = _evals_dir(skill) / "runs" / f"{run}-benchmark.json"
|
|
147
|
+
if not bench_f.exists():
|
|
148
|
+
sys.exit(f"error: {bench_f} not found — run aggregate first")
|
|
149
|
+
bench = json.loads(bench_f.read_text(encoding="utf-8"))
|
|
150
|
+
print(f"# Skill eval report — {skill} @ {run}\n")
|
|
151
|
+
print("| Scenario | Baseline | With skill | Δ tokens_out | Δ elapsed_s |")
|
|
152
|
+
print("|---|---|---|---|---|")
|
|
153
|
+
for sc in bench["scenarios"]:
|
|
154
|
+
b = sc["arms"].get("baseline", {})
|
|
155
|
+
w = sc["arms"].get("with-skill", {})
|
|
156
|
+
bp = f"{b.get('pass_count', 0)}/{b.get('total', 0)}"
|
|
157
|
+
wp = f"{w.get('pass_count', 0)}/{w.get('total', 0)}"
|
|
158
|
+
dt = (w.get("tokens_out") or 0) - (b.get("tokens_out") or 0)
|
|
159
|
+
de = (w.get("elapsed_s") or 0) - (b.get("elapsed_s") or 0)
|
|
160
|
+
print(f"| {sc['id']} | {bp} | {wp} | {dt:+d} | {de:+.2f} |")
|
|
161
|
+
t = bench["totals"]
|
|
162
|
+
print(f"\n**Totals:** baseline {t['baseline_pass']}/{t['scenarios']} · with-skill {t['with_skill_pass']}/{t['scenarios']}")
|
|
163
|
+
return 0
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main() -> int:
|
|
167
|
+
p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
168
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
169
|
+
for name in ("scaffold", "aggregate", "report"):
|
|
170
|
+
sp = sub.add_parser(name)
|
|
171
|
+
sp.add_argument("skill")
|
|
172
|
+
if name != "scaffold":
|
|
173
|
+
sp.add_argument("--run", required=True, help="run timestamp (from scaffold output)")
|
|
174
|
+
args = p.parse_args()
|
|
175
|
+
if args.cmd == "scaffold":
|
|
176
|
+
return cmd_scaffold(args.skill)
|
|
177
|
+
if args.cmd == "aggregate":
|
|
178
|
+
return cmd_aggregate(args.skill, args.run)
|
|
179
|
+
if args.cmd == "report":
|
|
180
|
+
return cmd_report(args.skill, args.run)
|
|
181
|
+
return 1
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
sys.exit(main())
|
|
@@ -39,6 +39,11 @@
|
|
|
39
39
|
"pattern": "^[a-z][a-z0-9-]*$",
|
|
40
40
|
"description": "Locked verb cluster this command belongs to. See docs/contracts/command-clusters.md."
|
|
41
41
|
},
|
|
42
|
+
"type": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"enum": ["orchestrator"],
|
|
45
|
+
"description": "Optional type tag. `orchestrator` marks a command that aggregates other commands / skills (cluster routers, top-level entry points) and exempts it from the `command_missing_skill_references` linter check. Omit the key for ordinary commands. See road-to-productization.md P5.3."
|
|
46
|
+
},
|
|
42
47
|
"sub": {
|
|
43
48
|
"type": "string",
|
|
44
49
|
"pattern": "^[a-z][a-z0-9-]*$",
|
|
@@ -47,6 +47,10 @@
|
|
|
47
47
|
"enum": ["senior"],
|
|
48
48
|
"description": "Optional tier marker. `senior` opts the skill into the Senior-Tier Required Structure check (Context-First lead, Related Skills, Proactive Triggers, Output Artifacts) per .agent-src.uncompressed/rules/skill-quality.md."
|
|
49
49
|
},
|
|
50
|
+
"meta_skill": {
|
|
51
|
+
"type": "boolean",
|
|
52
|
+
"description": "Opt-out of the linter's `skill_too_large` warn for skills whose purpose IS breadth (skill-writing, agent-docs-writing, skill-reviewer). Meta-skills inherently bundle multiple procedures and inline examples. Use sparingly — every meta_skill: true is a load-on-context trade-off."
|
|
53
|
+
},
|
|
50
54
|
"external_source": {
|
|
51
55
|
"type": "string",
|
|
52
56
|
"format": "uri",
|