@event4u/agent-config 2.10.0 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.agent-src/commands/agents.md +1 -0
  2. package/.agent-src/commands/challenge-me.md +1 -0
  3. package/.agent-src/commands/chat-history.md +1 -0
  4. package/.agent-src/commands/context.md +1 -0
  5. package/.agent-src/commands/council.md +1 -0
  6. package/.agent-src/commands/feature.md +1 -0
  7. package/.agent-src/commands/fix.md +1 -0
  8. package/.agent-src/commands/grill-me.md +1 -0
  9. package/.agent-src/commands/judge.md +1 -0
  10. package/.agent-src/commands/memory.md +1 -0
  11. package/.agent-src/commands/module.md +1 -0
  12. package/.agent-src/commands/onboard.md +32 -4
  13. package/.agent-src/commands/optimize.md +1 -0
  14. package/.agent-src/commands/override.md +1 -0
  15. package/.agent-src/commands/roadmap.md +1 -0
  16. package/.agent-src/commands/tests.md +1 -0
  17. package/.agent-src/skills/canvas-design/SKILL.md +132 -0
  18. package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
  19. package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
  20. package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
  21. package/.agent-src/skills/nextjs-patterns/SKILL.md +203 -0
  22. package/.agent-src/skills/skill-writing/SKILL.md +101 -16
  23. package/.agent-src/skills/sql-writing/SKILL.md +1 -1
  24. package/.agent-src/skills/symfony-workflow/SKILL.md +173 -0
  25. package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +4 -0
  26. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +3 -0
  27. package/.agent-src/templates/scripts/work_engine/hooks/builtin/decision_gate.py +162 -0
  28. package/.agent-src/templates/scripts/work_engine/hooks/settings.py +24 -6
  29. package/.agent-src/templates/scripts/work_engine/scoring/decision_engine.py +351 -0
  30. package/.claude-plugin/marketplace.json +5 -1
  31. package/CHANGELOG.md +68 -0
  32. package/README.md +37 -8
  33. package/config/agent-settings.template.yml +66 -0
  34. package/docs/architecture.md +1 -1
  35. package/docs/contracts/STABILITY.md +16 -0
  36. package/docs/contracts/adr-chat-history-split.md +1 -0
  37. package/docs/contracts/adr-forecast-construction-shape.md +1 -0
  38. package/docs/contracts/adr-gtm-context-spine.md +1 -0
  39. package/docs/contracts/adr-level-6-productization.md +147 -0
  40. package/docs/contracts/adr-settings-sync-engine.md +1 -0
  41. package/docs/contracts/adr-wing4-context-spine.md +1 -0
  42. package/docs/contracts/agent-memory-contract.md +1 -0
  43. package/docs/contracts/agents-md-tech-stack.md +1 -0
  44. package/docs/contracts/audit-log-v1.md +1 -0
  45. package/docs/contracts/command-clusters.md +1 -0
  46. package/docs/contracts/command-surface-tiers.md +1 -0
  47. package/docs/contracts/context-paths.md +1 -0
  48. package/docs/contracts/cost-profile-defaults.md +105 -0
  49. package/docs/contracts/cross-wing-handoff.md +1 -0
  50. package/docs/contracts/decision-engine-gates.md +115 -0
  51. package/docs/contracts/decision-trace-v1.md +1 -0
  52. package/docs/contracts/file-ownership-matrix.md +1 -0
  53. package/docs/contracts/hook-architecture-v1.md +1 -0
  54. package/docs/contracts/implement-ticket-flow.md +1 -0
  55. package/docs/contracts/installed-tools-lockfile.md +1 -0
  56. package/docs/contracts/kernel-membership.md +1 -0
  57. package/docs/contracts/linear-ai-rules-inclusion.md +1 -0
  58. package/docs/contracts/linear-ai-three-layers.md +1 -0
  59. package/docs/contracts/linter-structural-model.md +1 -0
  60. package/docs/contracts/load-context-budget-model.md +1 -0
  61. package/docs/contracts/load-context-schema.md +1 -0
  62. package/docs/contracts/memory-visibility-v1.md +1 -0
  63. package/docs/contracts/one-off-script-lifecycle.md +1 -0
  64. package/docs/contracts/orchestration-dsl-v1.md +1 -0
  65. package/docs/contracts/package-self-orientation.md +1 -0
  66. package/docs/contracts/persona-schema.md +1 -0
  67. package/docs/contracts/release-trunk-sync.md +104 -0
  68. package/docs/contracts/roadmap-complexity-standard.md +1 -0
  69. package/docs/contracts/rule-classification.md +1 -0
  70. package/docs/contracts/rule-interactions.md +26 -0
  71. package/docs/contracts/rule-priority-hierarchy.md +1 -0
  72. package/docs/contracts/rule-router.md +1 -0
  73. package/docs/contracts/settings-sync-yaml-subset.md +1 -0
  74. package/docs/contracts/skill-domains.md +1 -0
  75. package/docs/contracts/tier-3-contrib-plugin.md +1 -0
  76. package/docs/contracts/ui-stack-extension.md +1 -0
  77. package/docs/contracts/ui-track-flow.md +1 -0
  78. package/docs/customization.md +1 -1
  79. package/docs/getting-started.md +3 -1
  80. package/docs/installation.md +8 -6
  81. package/package.json +1 -1
  82. package/scripts/ai_council/clients.py +17 -4
  83. package/scripts/ai_council/orchestrator.py +6 -2
  84. package/scripts/check_beta_review_markers.py +127 -0
  85. package/scripts/check_references.py +25 -0
  86. package/scripts/check_release_trunk_sync.py +152 -0
  87. package/scripts/council_cli.py +36 -5
  88. package/scripts/install.py +3 -3
  89. package/scripts/run_skill_evals.py +185 -0
  90. package/scripts/schemas/command.schema.json +5 -0
  91. package/scripts/schemas/skill.schema.json +4 -0
  92. package/scripts/skill_linter.py +82 -3
  93. package/scripts/smoke_quickstart.py +134 -0
  94. package/scripts/validate_decision_engine.py +124 -0
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Beta-review-marker checker for `docs/contracts/`.
4
+
5
+ Every contract whose frontmatter declares `stability: beta` MUST carry
6
+ exactly one of the following frontmatter markers (per
7
+ `docs/contracts/STABILITY.md` § Beta-review markers, ratified in
8
+ `road-to-productization.md` § P5.4):
9
+
10
+ - `promote-to: stable`
11
+ - `keep-beta-until: YYYY-MM-DD` (max 90 days from the last review)
12
+ - `superseded-by: <contract-id>`
13
+
14
+ Exit codes: 0 = clean, 1 = violations found, 3 = internal error.
15
+
16
+ Usage:
17
+ python3 scripts/check_beta_review_markers.py
18
+ python3 scripts/check_beta_review_markers.py --json
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import json
25
+ import re
26
+ import sys
27
+ from dataclasses import asdict, dataclass
28
+ from datetime import date, timedelta
29
+ from pathlib import Path
30
+
31
+ ROOT = Path(__file__).resolve().parent.parent
32
+ CONTRACTS_DIR = Path("docs/contracts")
33
+
34
+ FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
35
+ STABILITY_RE = re.compile(r"^stability:\s*(\w+)\s*$", re.MULTILINE)
36
+ PROMOTE_RE = re.compile(r"^promote-to:\s*stable\s*$", re.MULTILINE)
37
+ KEEP_RE = re.compile(r"^keep-beta-until:\s*(\d{4}-\d{2}-\d{2})\s*$", re.MULTILINE)
38
+ SUPERSEDED_RE = re.compile(r"^superseded-by:\s*\S+\s*$", re.MULTILINE)
39
+
40
+ MAX_REVIEW_WINDOW_DAYS = 90
41
+
42
+
43
+ @dataclass
44
+ class Violation:
45
+ file: str
46
+ reason: str
47
+ severity: str # "error" | "warning"
48
+
49
+
50
+ def read_frontmatter(path: Path) -> str | None:
51
+ if not path.exists():
52
+ return None
53
+ txt = path.read_text(encoding="utf-8")
54
+ m = FRONTMATTER_RE.match(txt)
55
+ return m.group(1) if m else None
56
+
57
+
58
+ def check_one(path: Path, today: date) -> list[Violation]:
59
+ fm = read_frontmatter(path)
60
+ if fm is None:
61
+ return []
62
+ sm = STABILITY_RE.search(fm)
63
+ if not sm or sm.group(1) != "beta":
64
+ return []
65
+ markers = [
66
+ ("promote-to", bool(PROMOTE_RE.search(fm))),
67
+ ("keep-beta-until", bool(KEEP_RE.search(fm))),
68
+ ("superseded-by", bool(SUPERSEDED_RE.search(fm))),
69
+ ]
70
+ set_markers = [name for name, present in markers if present]
71
+ rel = str(path.relative_to(ROOT))
72
+ if not set_markers:
73
+ return [Violation(
74
+ file=rel,
75
+ reason="stability=beta but no review marker; add one of "
76
+ "`promote-to: stable` | `keep-beta-until: <date>` | "
77
+ "`superseded-by: <id>` (see STABILITY.md § Beta-review markers)",
78
+ severity="error",
79
+ )]
80
+ if len(set_markers) > 1:
81
+ return [Violation(
82
+ file=rel,
83
+ reason=f"multiple beta-review markers set ({', '.join(set_markers)}); "
84
+ "exactly one is allowed",
85
+ severity="error",
86
+ )]
87
+ km = KEEP_RE.search(fm)
88
+ if km:
89
+ review_date = date.fromisoformat(km.group(1))
90
+ max_date = today + timedelta(days=MAX_REVIEW_WINDOW_DAYS)
91
+ if review_date > max_date:
92
+ return [Violation(
93
+ file=rel,
94
+ reason=f"keep-beta-until={review_date} exceeds the "
95
+ f"{MAX_REVIEW_WINDOW_DAYS}-day window (max: {max_date})",
96
+ severity="error",
97
+ )]
98
+ return []
99
+
100
+
101
+ def main() -> int:
102
+ ap = argparse.ArgumentParser()
103
+ ap.add_argument("--json", action="store_true", help="machine-readable output")
104
+ args = ap.parse_args()
105
+ today = date.today()
106
+ violations: list[Violation] = []
107
+ for p in sorted((ROOT / CONTRACTS_DIR).glob("*.md")):
108
+ violations.extend(check_one(p, today))
109
+ if args.json:
110
+ print(json.dumps({"violations": [asdict(v) for v in violations]}, indent=2))
111
+ else:
112
+ if not violations:
113
+ print("✅ All beta contracts carry a valid review marker.")
114
+ else:
115
+ for v in violations:
116
+ icon = "❌" if v.severity == "error" else "⚠️ "
117
+ print(f"{icon} {v.file}: {v.reason}")
118
+ print(f"\n{len(violations)} violation(s).")
119
+ return 1 if any(v.severity == "error" for v in violations) else 0
120
+
121
+
122
+ if __name__ == "__main__":
123
+ try:
124
+ sys.exit(main())
125
+ except Exception as exc: # pragma: no cover
126
+ print(f"internal error: {exc}", file=sys.stderr)
127
+ sys.exit(3)
@@ -39,6 +39,17 @@ SKIP_DIRS = [
39
39
  "agents/council-questions", # design Q&A trail — forward-refs to planned artifacts
40
40
  "agents/analysis", # plate-comparison working docs — forward-refs to planned artifacts
41
41
  ]
42
+
43
+ # Per-file opt-out marker. When present in the first 10 lines of a .md
44
+ # file, the entire file is skipped. Use for working docs that
45
+ # intentionally reference planned-but-not-yet-existing artifacts
46
+ # (audit bundles, design Q&A, in-flight plans).
47
+ FILE_SKIP_MARKER = "<!-- check-refs: skip -->"
48
+
49
+ # Per-line opt-out marker. When present anywhere on a line, that line's
50
+ # refs are skipped. Use for isolated forward-refs inside otherwise
51
+ # fully-checked documents.
52
+ LINE_IGNORE_MARKER = "<!-- ref-ignore -->"
42
53
  ROOT = Path(".")
43
54
 
44
55
  # YAML memory files (engineering-memory layer) live under `agents/memory/`.
@@ -219,6 +230,14 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
219
230
  except Exception:
220
231
  return broken
221
232
 
233
+ # File-level opt-out: working docs that intentionally reference
234
+ # planned-but-not-yet-existing artifacts mark themselves with
235
+ # `<!-- check-refs: skip -->` in the first 10 lines. Marker pairs
236
+ # with the per-line `<!-- ref-ignore -->` below; either suffices.
237
+ header_lines = text.splitlines()[:10]
238
+ if any(FILE_SKIP_MARKER in line for line in header_lines):
239
+ return broken
240
+
222
241
  # Validate `personas:` frontmatter entries against known persona ids.
223
242
  for line_no, pid in _extract_personas_frontmatter(text):
224
243
  if pid not in artifacts["personas"]:
@@ -241,6 +260,12 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
241
260
  if in_code_block:
242
261
  continue
243
262
 
263
+ # Per-line opt-out: isolated forward-refs in otherwise checked
264
+ # documents (e.g. one ref to a planned skill, surrounded by
265
+ # valid refs). Skip the whole line's path / skill / rule checks.
266
+ if LINE_IGNORE_MARKER in line:
267
+ continue
268
+
244
269
  # Unchecked TODO checkboxes document future work — their refs are
245
270
  # forward-looking and will not resolve yet. Track multi-line bullets:
246
271
  # any `- [ ]` opens a TODO context; a new top-level bullet, heading,
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Release-trunk-sync CI gate (road-to-productization P1.3).
4
+
5
+ Fails if `main` is more than one tagged release behind the current
6
+ release-prep branch's target version. No-ops on every other branch
7
+ class. Owner contract: `docs/contracts/release-trunk-sync.md`.
8
+
9
+ Exit codes: 0 = pass / no-op, 1 = main is too far behind, 3 = internal
10
+ error (git unavailable, malformed tag, etc.).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import re
17
+ import subprocess
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ RELEASE_BRANCH_RE = re.compile(r"^release/(\d+)\.(\d+)\.(\d+)$")
22
+ SEMVER_TAG_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)$")
23
+ BOOTSTRAP_FILE = Path("docs/contracts/release-trunk-sync.bootstrap")
24
+
25
+
26
+ def _git(*args: str) -> str:
27
+ proc = subprocess.run(
28
+ ["git", *args], capture_output=True, text=True, check=False
29
+ )
30
+ if proc.returncode != 0:
31
+ return ""
32
+ return proc.stdout.strip()
33
+
34
+
35
+ def _current_branch() -> str:
36
+ return _git("rev-parse", "--abbrev-ref", "HEAD")
37
+
38
+
39
+ def _parse_semver(text: str) -> tuple[int, int, int] | None:
40
+ m = SEMVER_TAG_RE.match(text)
41
+ if not m:
42
+ return None
43
+ return int(m.group(1)), int(m.group(2)), int(m.group(3))
44
+
45
+
46
+ def _all_tags() -> list[tuple[int, int, int]]:
47
+ raw = _git("tag", "--list")
48
+ tags = []
49
+ for line in raw.splitlines():
50
+ parsed = _parse_semver(line.strip())
51
+ if parsed is not None:
52
+ tags.append(parsed)
53
+ tags.sort()
54
+ return tags
55
+
56
+
57
+ def _main_tag() -> tuple[int, int, int] | None:
58
+ """Highest semver tag whose commit is reachable from main."""
59
+ # Try local main, fall back to origin/main.
60
+ for ref in ("refs/heads/main", "refs/remotes/origin/main"):
61
+ head = _git("rev-parse", "--verify", ref)
62
+ if head:
63
+ break
64
+ else:
65
+ return None
66
+ # `git tag --merged <main>` lists tags reachable from main.
67
+ raw = _git("tag", "--merged", head)
68
+ reachable: list[tuple[int, int, int]] = []
69
+ for line in raw.splitlines():
70
+ parsed = _parse_semver(line.strip())
71
+ if parsed is not None:
72
+ reachable.append(parsed)
73
+ if not reachable:
74
+ return None
75
+ return max(reachable)
76
+
77
+
78
+ def _prior_release(
79
+ target: tuple[int, int, int], tags: list[tuple[int, int, int]]
80
+ ) -> tuple[int, int, int] | None:
81
+ earlier = [t for t in tags if t < target]
82
+ return max(earlier) if earlier else None
83
+
84
+
85
+ def _bootstrap_ok(target: tuple[int, int, int]) -> bool:
86
+ if not BOOTSTRAP_FILE.exists():
87
+ return False
88
+ target_s = "{0}.{1}.{2}".format(*target)
89
+ for line in BOOTSTRAP_FILE.read_text().splitlines():
90
+ line = line.strip()
91
+ if not line or line.startswith("#"):
92
+ continue
93
+ if line == target_s:
94
+ return True
95
+ return False
96
+
97
+
98
+ def main() -> int:
99
+ branch = _current_branch()
100
+ if branch == "HEAD" or not branch:
101
+ print("::warning::detached HEAD — release-trunk-sync gate skipped")
102
+ return 0
103
+ # CI override: GitHub Actions sometimes runs on the merge ref.
104
+ ci_ref = os.environ.get("GITHUB_HEAD_REF") or os.environ.get(
105
+ "GITHUB_REF_NAME"
106
+ )
107
+ if ci_ref:
108
+ branch = ci_ref
109
+ m = RELEASE_BRANCH_RE.match(branch)
110
+ if not m:
111
+ return 0 # non-release branch class — gate is a no-op
112
+ target = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
113
+ tags = _all_tags()
114
+ if not tags:
115
+ print(
116
+ "::warning::no semver tags found — release-trunk-sync gate skipped"
117
+ )
118
+ return 0
119
+ main_tag = _main_tag()
120
+ if main_tag is None:
121
+ print(
122
+ "::warning::no semver tag reachable from main — gate skipped"
123
+ )
124
+ return 0
125
+ if main_tag >= target:
126
+ return 0 # main already at or ahead of release target
127
+ prior = _prior_release(target, tags)
128
+ if prior is not None and main_tag >= prior:
129
+ return 0 # within the one-release tolerance
130
+ if _bootstrap_ok(target):
131
+ target_s = "{0}.{1}.{2}".format(*target)
132
+ print(
133
+ f"::warning::release-trunk-sync gate suppressed for {target_s} "
134
+ "via bootstrap file"
135
+ )
136
+ return 0
137
+ main_s = "{0}.{1}.{2}".format(*main_tag)
138
+ target_s = "{0}.{1}.{2}".format(*target)
139
+ print(
140
+ f"::error::main is at {main_s}; release-prep branch targets "
141
+ f"{target_s}. Main must be no more than one tagged release behind. "
142
+ "See docs/contracts/release-trunk-sync.md."
143
+ )
144
+ return 1
145
+
146
+
147
+ if __name__ == "__main__":
148
+ try:
149
+ sys.exit(main())
150
+ except Exception as exc: # noqa: BLE001
151
+ print(f"::error::release-trunk-sync gate internal error: {exc}")
152
+ sys.exit(3)
@@ -31,6 +31,7 @@ from scripts.ai_council.bundler import ( # noqa: E402
31
31
  BundleTooLarge, bundle_prompt, bundle_roadmap,
32
32
  )
33
33
  from scripts.ai_council.clients import ( # noqa: E402
34
+ DEFAULT_MAX_TOKENS, UNLIMITED_TOKENS_FALLBACK,
34
35
  AnthropicClient, CouncilResponse, ExternalAIClient, ManualClient,
35
36
  OpenAIClient, load_anthropic_key, load_openai_key,
36
37
  )
@@ -236,6 +237,32 @@ def _resolve_rounds(args: argparse.Namespace, ai_cfg: dict[str, Any]) -> int:
236
237
  return min_rounds
237
238
 
238
239
 
240
+ def _resolve_max_tokens(args: argparse.Namespace, ai_cfg: dict[str, Any]) -> int:
241
+ """Resolve the per-call output budget passed to each member.
242
+
243
+ Resolution chain (highest priority first):
244
+ 1. ``--max-tokens N`` — explicit invocation override.
245
+ 2. ``ai_council.max_output_tokens`` — settings value (project file
246
+ is authoritative; this key is not user-global-mergeable).
247
+ 3. ``DEFAULT_MAX_TOKENS`` — package fallback (2048).
248
+
249
+ A value of ``0`` at any layer means "unlimited"; it is widened to
250
+ ``UNLIMITED_TOKENS_FALLBACK`` before reaching the SDK because
251
+ Anthropic rejects ``max_tokens=0``. Estimation uses the same expanded
252
+ value so the cost preview reflects the worst-case ceiling.
253
+ """
254
+ cli = getattr(args, "max_tokens", None)
255
+ if cli is not None:
256
+ value = int(cli)
257
+ elif "max_output_tokens" in ai_cfg:
258
+ value = int(ai_cfg.get("max_output_tokens") or 0)
259
+ else:
260
+ value = DEFAULT_MAX_TOKENS
261
+ if value <= 0:
262
+ return UNLIMITED_TOKENS_FALLBACK
263
+ return value
264
+
265
+
239
266
  def cmd_estimate(
240
267
  args: argparse.Namespace,
241
268
  *,
@@ -255,9 +282,10 @@ def cmd_estimate(
255
282
  )
256
283
  if table is None:
257
284
  table = load_prices()
285
+ ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
258
286
  question, _ = build_question(
259
287
  input_path=Path(args.question), input_mode=args.input_mode,
260
- max_tokens=args.max_tokens,
288
+ max_tokens=_resolve_max_tokens(args, ai_cfg),
261
289
  )
262
290
  project = detect_project_context(REPO_ROOT)
263
291
  billable = [m for m in members if getattr(m, "billable", True)]
@@ -316,9 +344,10 @@ def cmd_run(
316
344
  )
317
345
  if table is None:
318
346
  table = load_prices()
347
+ ai_cfg = (settings.get("ai_council") or {}) if isinstance(settings, dict) else {}
319
348
  question, artefact = build_question(
320
349
  input_path=Path(args.question), input_mode=args.input_mode,
321
- max_tokens=args.max_tokens,
350
+ max_tokens=_resolve_max_tokens(args, ai_cfg),
322
351
  )
323
352
  project = detect_project_context(REPO_ROOT)
324
353
  billable = [m for m in members if getattr(m, "billable", True)]
@@ -337,7 +366,6 @@ def cmd_run(
337
366
  )
338
367
  return 0
339
368
 
340
- ai_cfg = settings.get("ai_council") or {}
341
369
  cost_cfg = ai_cfg.get("cost_budget") or {}
342
370
  budget = CostBudget(
343
371
  max_input_tokens=int(cost_cfg.get("max_input_tokens", 50_000)),
@@ -451,8 +479,11 @@ def _add_common_input_args(p: argparse.ArgumentParser) -> None:
451
479
  p.add_argument("--input-mode", choices=["prompt", "roadmap"],
452
480
  default="prompt",
453
481
  help="How to bundle the file (default: prompt).")
454
- p.add_argument("--max-tokens", type=int, default=1024,
455
- help="Per-member output budget (default: 1024).")
482
+ p.add_argument("--max-tokens", type=int, default=None,
483
+ help="Per-member output budget. Default reads "
484
+ "ai_council.max_output_tokens from .agent-settings.yml "
485
+ "(2048 if unset). 0 = unlimited (widened to the safe "
486
+ "provider ceiling before the SDK call).")
456
487
  p.add_argument("--mode-override", choices=["api", "manual"], default=None,
457
488
  help="Override every member's transport mode.")
458
489
  p.add_argument("--model", action="append", default=None, dest="model",
@@ -12,8 +12,8 @@ format in `.agent-settings.yml`, leaves a one-shot backup as
12
12
  exactly once; subsequent runs are idempotent.
13
13
 
14
14
  Usage:
15
- python3 scripts/install.py # defaults: cost_profile=minimal
16
- python3 scripts/install.py --profile=balanced # set cost_profile=balanced
15
+ python3 scripts/install.py # defaults: cost_profile=balanced
16
+ python3 scripts/install.py --profile=minimal # set cost_profile=minimal (kernel only)
17
17
  python3 scripts/install.py --force # overwrite existing files
18
18
  python3 scripts/install.py --skip-bridges # only create .agent-settings.yml
19
19
  python3 scripts/install.py --project <dir> # override project root
@@ -42,7 +42,7 @@ try:
42
42
  except ImportError: # pragma: no cover — alt sys.path layout
43
43
  from _lib.json_pointers import build_merge_entries # type: ignore[no-redef] # noqa: PLC0415
44
44
 
45
- DEFAULT_PROFILE = "minimal"
45
+ DEFAULT_PROFILE = "balanced"
46
46
  SUPPORTED_PROFILES = ("minimal", "balanced", "full")
47
47
  COST_PROFILE_PLACEHOLDER = "__COST_PROFILE__"
48
48
 
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/env python3
2
+ """Quantitative skill-eval orchestrator (skill-writing § 7).
3
+
4
+ Scaffolds, aggregates, and reports sub-agent eval runs for a skill.
5
+
6
+ Sub-agent SPAWNING is per-environment (Claude Code, Augment Code,
7
+ council) and is left as a stub `_spawn_subagent(...)` that authors
8
+ implement once for their environment. The rest of the loop —
9
+ scaffold / aggregate / report — works out of the box and reads /
10
+ writes JSON files in `runs/`.
11
+
12
+ Layout per skill:
13
+
14
+ .agent-src.uncompressed/skills/{name}/evals/
15
+ evals.json
16
+ runs/ # gitignored
17
+ {timestamp}-baseline/{scenario_id}/output.txt
18
+ {timestamp}-baseline/{scenario_id}/grade.json
19
+ {timestamp}-with-skill/{scenario_id}/output.txt
20
+ {timestamp}-with-skill/{scenario_id}/grade.json
21
+ {timestamp}-benchmark.json
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import argparse
27
+ import json
28
+ import sys
29
+ from datetime import datetime, timezone
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ REPO_ROOT = Path(__file__).resolve().parent.parent
34
+ SKILLS_ROOT = REPO_ROOT / ".agent-src.uncompressed" / "skills"
35
+
36
+
37
+ def _skill_dir(skill: str) -> Path:
38
+ p = SKILLS_ROOT / skill
39
+ if not p.is_dir():
40
+ sys.exit(f"error: skill {skill!r} not found at {p}")
41
+ return p
42
+
43
+
44
+ def _evals_dir(skill: str) -> Path:
45
+ return _skill_dir(skill) / "evals"
46
+
47
+
48
+ def _load_evals(skill: str) -> dict[str, Any]:
49
+ f = _evals_dir(skill) / "evals.json"
50
+ if not f.exists():
51
+ sys.exit(f"error: {f} not found — create it before scaffolding")
52
+ return json.loads(f.read_text(encoding="utf-8"))
53
+
54
+
55
+ def _timestamp() -> str:
56
+ return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
57
+
58
+
59
+ def _spawn_subagent(prompt: str, *, load_skill: str | None) -> dict[str, Any]:
60
+ """STUB — implement per environment.
61
+
62
+ Must return {"output": str, "elapsed_s": float, "tokens_in": int,
63
+ "tokens_out": int}. When load_skill is None, run baseline; when
64
+ set, load that skill into the sub-agent's context.
65
+ """
66
+ raise NotImplementedError(
67
+ "implement _spawn_subagent for this environment (Claude Code, "
68
+ "Augment, council, ...) — see docstring contract"
69
+ )
70
+
71
+
72
+ def _grade_assertions(output: str, run_dir: Path, assertions: list[dict[str, Any]]) -> list[dict[str, Any]]:
73
+ results: list[dict[str, Any]] = []
74
+ for a in assertions:
75
+ kind = a.get("kind")
76
+ if kind == "contains":
77
+ ok = a["value"] in output
78
+ results.append({"kind": kind, "value": a["value"], "pass": ok})
79
+ elif kind == "file_exists":
80
+ ok = (run_dir / a["path"]).exists() or Path(a["path"]).exists()
81
+ results.append({"kind": kind, "path": a["path"], "pass": ok})
82
+ elif kind == "rubric":
83
+ results.append({"kind": kind, "criterion": a["criterion"], "pass": None,
84
+ "note": "rubric grading requires sub-agent — fill in manually or via grader"})
85
+ else:
86
+ results.append({"kind": kind, "pass": False, "note": f"unknown assertion kind {kind!r}"})
87
+ return results
88
+
89
+
90
+ def cmd_scaffold(skill: str) -> int:
91
+ spec = _load_evals(skill)
92
+ scenarios = spec.get("scenarios", [])
93
+ if not scenarios:
94
+ sys.exit("error: evals.json has no scenarios")
95
+ ts = _timestamp()
96
+ runs = _evals_dir(skill) / "runs"
97
+ for arm in ("baseline", "with-skill"):
98
+ for sc in scenarios:
99
+ d = runs / f"{ts}-{arm}" / sc["id"]
100
+ d.mkdir(parents=True, exist_ok=True)
101
+ (d / "meta.json").write_text(json.dumps({
102
+ "skill": skill, "arm": arm, "scenario_id": sc["id"],
103
+ "prompt": sc["prompt"], "assertions": sc.get("assertions", []),
104
+ "timestamp": ts,
105
+ }, indent=2) + "\n", encoding="utf-8")
106
+ print(f"scaffolded {len(scenarios)} scenarios × 2 arms at runs/{ts}-{{baseline,with-skill}}/")
107
+ print(f"timestamp: {ts}")
108
+ return 0
109
+
110
+
111
+ def cmd_aggregate(skill: str, run: str) -> int:
112
+ runs = _evals_dir(skill) / "runs"
113
+ spec = _load_evals(skill)
114
+ bench: dict[str, Any] = {"skill": skill, "run": run, "generated_at": _timestamp(), "scenarios": []}
115
+ totals = {"baseline_pass": 0, "with_skill_pass": 0, "scenarios": 0}
116
+ for sc in spec.get("scenarios", []):
117
+ row: dict[str, Any] = {"id": sc["id"], "arms": {}}
118
+ for arm in ("baseline", "with-skill"):
119
+ run_dir = runs / f"{run}-{arm}" / sc["id"]
120
+ grade_f = run_dir / "grade.json"
121
+ if not grade_f.exists():
122
+ row["arms"][arm] = {"status": "missing", "pass_count": 0, "total": 0}
123
+ continue
124
+ g = json.loads(grade_f.read_text(encoding="utf-8"))
125
+ results = g.get("results", [])
126
+ passed = sum(1 for r in results if r.get("pass") is True)
127
+ row["arms"][arm] = {"status": "graded", "pass_count": passed, "total": len(results),
128
+ "elapsed_s": g.get("elapsed_s"), "tokens_in": g.get("tokens_in"),
129
+ "tokens_out": g.get("tokens_out")}
130
+ if arm == "baseline" and passed == len(results) and results:
131
+ totals["baseline_pass"] += 1
132
+ if arm == "with-skill" and passed == len(results) and results:
133
+ totals["with_skill_pass"] += 1
134
+ bench["scenarios"].append(row)
135
+ totals["scenarios"] += 1
136
+ bench["totals"] = totals
137
+ out = runs / f"{run}-benchmark.json"
138
+ out.write_text(json.dumps(bench, indent=2) + "\n", encoding="utf-8")
139
+ print(f"wrote {out.relative_to(REPO_ROOT)}")
140
+ print(f" baseline pass: {totals['baseline_pass']}/{totals['scenarios']}")
141
+ print(f" with-skill pass: {totals['with_skill_pass']}/{totals['scenarios']}")
142
+ return 0
143
+
144
+
145
+ def cmd_report(skill: str, run: str) -> int:
146
+ bench_f = _evals_dir(skill) / "runs" / f"{run}-benchmark.json"
147
+ if not bench_f.exists():
148
+ sys.exit(f"error: {bench_f} not found — run aggregate first")
149
+ bench = json.loads(bench_f.read_text(encoding="utf-8"))
150
+ print(f"# Skill eval report — {skill} @ {run}\n")
151
+ print("| Scenario | Baseline | With skill | Δ tokens_out | Δ elapsed_s |")
152
+ print("|---|---|---|---|---|")
153
+ for sc in bench["scenarios"]:
154
+ b = sc["arms"].get("baseline", {})
155
+ w = sc["arms"].get("with-skill", {})
156
+ bp = f"{b.get('pass_count', 0)}/{b.get('total', 0)}"
157
+ wp = f"{w.get('pass_count', 0)}/{w.get('total', 0)}"
158
+ dt = (w.get("tokens_out") or 0) - (b.get("tokens_out") or 0)
159
+ de = (w.get("elapsed_s") or 0) - (b.get("elapsed_s") or 0)
160
+ print(f"| {sc['id']} | {bp} | {wp} | {dt:+d} | {de:+.2f} |")
161
+ t = bench["totals"]
162
+ print(f"\n**Totals:** baseline {t['baseline_pass']}/{t['scenarios']} · with-skill {t['with_skill_pass']}/{t['scenarios']}")
163
+ return 0
164
+
165
+
166
+ def main() -> int:
167
+ p = argparse.ArgumentParser(description=__doc__.splitlines()[0])
168
+ sub = p.add_subparsers(dest="cmd", required=True)
169
+ for name in ("scaffold", "aggregate", "report"):
170
+ sp = sub.add_parser(name)
171
+ sp.add_argument("skill")
172
+ if name != "scaffold":
173
+ sp.add_argument("--run", required=True, help="run timestamp (from scaffold output)")
174
+ args = p.parse_args()
175
+ if args.cmd == "scaffold":
176
+ return cmd_scaffold(args.skill)
177
+ if args.cmd == "aggregate":
178
+ return cmd_aggregate(args.skill, args.run)
179
+ if args.cmd == "report":
180
+ return cmd_report(args.skill, args.run)
181
+ return 1
182
+
183
+
184
+ if __name__ == "__main__":
185
+ sys.exit(main())
@@ -39,6 +39,11 @@
39
39
  "pattern": "^[a-z][a-z0-9-]*$",
40
40
  "description": "Locked verb cluster this command belongs to. See docs/contracts/command-clusters.md."
41
41
  },
42
+ "type": {
43
+ "type": "string",
44
+ "enum": ["orchestrator"],
45
+ "description": "Optional type tag. `orchestrator` marks a command that aggregates other commands / skills (cluster routers, top-level entry points) and exempts it from the `command_missing_skill_references` linter check. Omit the key for ordinary commands. See road-to-productization.md P5.3."
46
+ },
42
47
  "sub": {
43
48
  "type": "string",
44
49
  "pattern": "^[a-z][a-z0-9-]*$",
@@ -47,6 +47,10 @@
47
47
  "enum": ["senior"],
48
48
  "description": "Optional tier marker. `senior` opts the skill into the Senior-Tier Required Structure check (Context-First lead, Related Skills, Proactive Triggers, Output Artifacts) per .agent-src.uncompressed/rules/skill-quality.md."
49
49
  },
50
+ "meta_skill": {
51
+ "type": "boolean",
52
+ "description": "Opt-out of the linter's `skill_too_large` warn for skills whose purpose IS breadth (skill-writing, agent-docs-writing, skill-reviewer). Meta-skills inherently bundle multiple procedures and inline examples. Use sparingly — every meta_skill: true is a load-on-context trade-off."
53
+ },
50
54
  "external_source": {
51
55
  "type": "string",
52
56
  "format": "uri",