@event4u/agent-config 2.11.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/.agent-src/commands/council/analysis.md +142 -0
  2. package/.agent-src/commands/council/debate.md +129 -0
  3. package/.agent-src/commands/council/default.md +8 -0
  4. package/.agent-src/commands/council/design.md +16 -12
  5. package/.agent-src/commands/council/optimize.md +16 -15
  6. package/.agent-src/commands/council/pr.md +12 -12
  7. package/.agent-src/commands/council.md +48 -2
  8. package/.agent-src/personas/advisors/contrarian.md +95 -0
  9. package/.agent-src/personas/advisors/executor.md +99 -0
  10. package/.agent-src/personas/advisors/expansionist.md +98 -0
  11. package/.agent-src/personas/advisors/first-principles.md +98 -0
  12. package/.agent-src/personas/advisors/outsider.md +102 -0
  13. package/.agent-src/rules/copilot-routing.md +19 -0
  14. package/.agent-src/rules/devcontainer-routing.md +20 -0
  15. package/.agent-src/rules/laravel-routing.md +20 -0
  16. package/.agent-src/rules/symfony-routing.md +20 -0
  17. package/.agent-src/skills/ai-council/SKILL.md +180 -2
  18. package/.agent-src/skills/canvas-design/SKILL.md +132 -0
  19. package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
  20. package/.agent-src/skills/copilot-config/SKILL.md +1 -1
  21. package/.agent-src/skills/devcontainer/SKILL.md +1 -1
  22. package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
  23. package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
  24. package/.agent-src/skills/laravel/SKILL.md +1 -1
  25. package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
  26. package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
  27. package/.agent-src/skills/skill-writing/SKILL.md +101 -16
  28. package/.agent-src/skills/sql-writing/SKILL.md +1 -1
  29. package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
  30. package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
  31. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  32. package/.claude-plugin/marketplace.json +5 -1
  33. package/AGENTS.md +1 -1
  34. package/CHANGELOG.md +78 -0
  35. package/CONTRIBUTING.md +5 -0
  36. package/README.md +3 -3
  37. package/config/agent-settings.template.yml +5 -84
  38. package/docs/architecture/multi-tool-projection.md +53 -0
  39. package/docs/architecture/{compression.md → source-projection.md} +21 -3
  40. package/docs/architecture.md +6 -6
  41. package/docs/catalog.md +21 -11
  42. package/docs/contracts/adr-architectural-consensus-mechanism.md +67 -0
  43. package/docs/contracts/adr-level-6-productization.md +2 -2
  44. package/docs/contracts/ai-council-config.md +186 -0
  45. package/docs/contracts/command-clusters.md +57 -1
  46. package/docs/contracts/multi-tool-projection-fidelity.md +109 -0
  47. package/docs/getting-started.md +2 -2
  48. package/package.json +1 -1
  49. package/scripts/_archive/README.md +59 -0
  50. package/scripts/ai_council/_default_prices.py +10 -1
  51. package/scripts/ai_council/advisors.py +148 -0
  52. package/scripts/ai_council/clients.py +189 -4
  53. package/scripts/ai_council/config.py +368 -0
  54. package/scripts/ai_council/consensus.py +290 -0
  55. package/scripts/ai_council/orchestrator.py +634 -16
  56. package/scripts/ai_council/prompts.py +335 -0
  57. package/scripts/check_compressed_paths.py +6 -1
  58. package/scripts/check_references.py +25 -0
  59. package/scripts/ci_time_ratio.py +168 -0
  60. package/scripts/council_cli.py +1007 -32
  61. package/scripts/measure_projection_bytes.py +159 -0
  62. package/scripts/measure_roadmap_trajectory.py +112 -0
  63. package/scripts/probe_projection_fidelity.py +202 -0
  64. package/scripts/run_skill_evals.py +185 -0
  65. package/scripts/schemas/skill.schema.json +4 -0
  66. package/scripts/score_skill_selection.py +198 -0
  67. package/scripts/skill_collision_clusters.py +162 -0
  68. package/scripts/skill_linter.py +71 -1
  69. /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
  70. /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
  71. /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
  72. /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
  73. /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
  74. /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
  75. /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
@@ -122,6 +122,42 @@ MUST:
122
122
  evidence in the artefact).
123
123
  """.strip()
124
124
 
125
+ ANALYSIS_MODE = """\
126
+ The artefact is a local analysis output (from a project analyzer,
127
+ audit script, or codebase scan). Critique the **analysis itself**, not
128
+ the underlying codebase. You MUST:
129
+ 1. Flag findings that are restated under different headings —
130
+ deduplicate aggressively. The downstream consumer wants a unique
131
+ Top-N, not a long list with overlap.
132
+ 2. Score the evidence quality of each finding: confirmed (the
133
+ analysis cites file:line / metric), inferred (plausible from
134
+ stated context), or speculative (no citation, vibes-only).
135
+ Speculative findings must be called out by name.
136
+ 3. Identify findings that are roadmap-ready (concrete enough to land
137
+ as a phase step) vs ones that need a discovery loop first.
138
+ 4. Propose 3–5 follow-up actions ranked by leverage — what the next
139
+ roadmap should attack first. Cite the supporting finding(s) by id
140
+ or heading.
141
+ End with: a Top-N consensus list (one bullet per finding the
142
+ analysis surfaces) plus a single sentence on the strongest blind
143
+ spot the analysis itself has.
144
+ """.strip()
145
+
146
+
147
+ DEBATE_MODE = """\
148
+ The artefact is the topic of a structured multi-round debate. You are
149
+ one of several independent reviewers. Round-specific instructions:
150
+ 1. Round 1 — state your strongest, most defensible position on the
151
+ topic. Argue from evidence and first principles. Do not hedge.
152
+ 2. Round 2+ — read the anonymised positions from the previous round.
153
+ Identify the SINGLE strongest opposing position and write a
154
+ rebuttal addressed at its strongest steel-manned form. Your task
155
+ is to find the load-bearing flaw the opposing reviewer missed —
156
+ do NOT search for common ground.
157
+ End each round with: a one-line position summary and the single
158
+ piece of evidence that would change your mind.
159
+ """.strip()
160
+
125
161
 
126
162
  _MODE_TABLE = {
127
163
  "prompt": PROMPT_MODE,
@@ -131,9 +167,185 @@ _MODE_TABLE = {
131
167
  "pr": PR_MODE,
132
168
  "design": DESIGN_MODE,
133
169
  "optimize": OPTIMIZE_MODE,
170
+ "analysis": ANALYSIS_MODE,
171
+ "debate": DEBATE_MODE,
172
+ }
173
+
174
+
175
+ # ── Consensus-scoring prompts (Phase 4 / F3) ──────────────────────────
176
+ #
177
+ # Two-step extraction + scoring round used by the analysis lens. The
178
+ # extraction pass asks each member to surface its own top findings in
179
+ # a strict JSON shape; the scoring pass asks each member to rate
180
+ # anonymised findings produced by the *other* members.
181
+ #
182
+ # Iron Law of Neutrality applies to both: the extraction prompt never
183
+ # names other reviewers, and the scoring prompt strips the source
184
+ # author by using `Finding-A` / `Finding-B` labels (see
185
+ # `consensus.anonymize_findings`).
186
+
187
+ FINDING_EXTRACTION_PROMPT = """\
188
+ You have just produced an analysis. Re-emit your top findings as a
189
+ strict JSON array suitable for downstream tooling. Each item MUST
190
+ have:
191
+
192
+ {"id": "<short-slug>", "text": "<one-sentence finding>"}
193
+
194
+ Rules:
195
+ - 3-7 findings, ordered by importance (most important first).
196
+ - `id` is a 1-3 word kebab-case slug, unique within your array.
197
+ - `text` is a single sentence, no markdown, no reviewer self-reference.
198
+ - Wrap the array in a ```json``` fenced block. No commentary outside it.
199
+ """.strip()
200
+
201
+ FINDING_SCORING_PROMPT = """\
202
+ Below are findings from other independent reviewers, presented with
203
+ neutral labels (Finding-A, Finding-B, …). Score each one on its
204
+ merits. You MUST emit a strict JSON array, one entry per finding,
205
+ in this shape:
206
+
207
+ {"finding_id": "Finding-A", "score": 1-10, "agree": true|false,
208
+ "reason": "<one-sentence justification>"}
209
+
210
+ Rules:
211
+ - `score` is an integer 1 (weak / irrelevant) to 10 (load-bearing /
212
+ must-address).
213
+ - `agree=true` means you would surface this same finding yourself;
214
+ `agree=false` means you think it is wrong, overstated, or off-topic.
215
+ - `reason` is a single sentence, no markdown.
216
+ - Wrap the array in a ```json``` fenced block. No commentary outside it.
217
+
218
+ You may not see your own findings in the list — that is by design.
219
+ """.strip()
220
+
221
+
222
+ # ── Synthesis templates (Phase 3 / F2) ────────────────────────────────
223
+ #
224
+ # Lens-aware synthesis prompts. Each entry maps a lens key onto the
225
+ # block the host agent should produce when summarising member responses.
226
+ # R4 Q4 split: decision lenses get a Karpathy-structured template;
227
+ # creative lenses (design / optimize) stay open-ended prose (empty
228
+ # string → renderer falls back to the bare "Convergence / Divergence"
229
+ # slot). Input modes (prompt / roadmap / diff / files) map onto the
230
+ # `default` decision template via `synthesis_template()`.
231
+
232
+ DEFAULT_SYNTHESIS = """\
233
+ Summarise the council using the structured shape below. Be terse,
234
+ cite reviewers by label, and refuse to invent agreement that is not
235
+ in the responses.
236
+
237
+ ### Agreement
238
+ Points that two or more reviewers converged on, each as a single line.
239
+
240
+ ### Clashes
241
+ Points where reviewers disagreed. State both sides with a one-line
242
+ reviewer-label citation per side.
243
+
244
+ ### Blind spots
245
+ Items that none of the reviewers raised but that the artefact's
246
+ context suggests are load-bearing. Maximum three. Mark each as
247
+ `needs-verification` when the host agent inferred it rather than
248
+ read it directly from a response.
249
+
250
+ ### Recommendation
251
+ A single sentence: which course the host agent should advise the
252
+ user to take, grounded in the strongest converged point.
253
+
254
+ ### Next step
255
+ One concrete next action the user can take in their current turn.
256
+ """.strip()
257
+
258
+ PR_SYNTHESIS = """\
259
+ Summarise the council with the PR-review shape below.
260
+
261
+ ### Consensus
262
+ Findings where two or more reviewers agreed, each one a single line.
263
+
264
+ ### Conflicts
265
+ Findings where reviewers disagreed. State both sides with reviewer
266
+ labels; do not pick a winner here — that lives in the recommendation.
267
+
268
+ ### Must-fix before merge
269
+ Items at least one reviewer marked `REQUEST_CHANGES` or `REJECT`
270
+ and the host agent confirms are load-bearing. Maximum five.
271
+
272
+ ### Recommendation
273
+ APPROVE / REQUEST_CHANGES / REJECT and a single sentence justifying
274
+ the verdict, anchored on the strongest consensus or must-fix line.
275
+ """.strip()
276
+
277
+ ANALYSIS_SYNTHESIS = """\
278
+ Summarise the council with the analysis-lens shape below.
279
+
280
+ ### Top-10 by consensus
281
+ Findings ranked by how many reviewers surfaced them. Format each
282
+ line as: `N. <finding> — cited by <reviewer labels> · evidence:
283
+ confirmed | inferred | speculative · roadmap-ready: yes | needs-discovery`.
284
+ Stop at ten or when only single-reviewer items remain, whichever
285
+ comes first.
286
+
287
+ ### Supporting
288
+ Findings that one reviewer raised and at least one other treated as
289
+ plausible but did not independently surface. One line each, same
290
+ metadata shape as Top-10.
291
+
292
+ ### Outliers
293
+ Single-reviewer findings the others did not engage with. Keep them
294
+ — they are signal for a future deeper analysis pass — but mark each
295
+ as `unverified-by-council`.
296
+ """.strip()
297
+
298
+ # Creative lenses — open-ended prose, no template. The renderer keeps
299
+ # the bare "Convergence / Divergence" slot so the host agent can write
300
+ # free-form synthesis.
301
+ _CREATIVE_PASSTHROUGH = ""
302
+
303
+ _SYNTHESIS_TABLE = {
304
+ "default": DEFAULT_SYNTHESIS,
305
+ "pr": PR_SYNTHESIS,
306
+ "analysis": ANALYSIS_SYNTHESIS,
307
+ "design": _CREATIVE_PASSTHROUGH,
308
+ "optimize": _CREATIVE_PASSTHROUGH,
309
+ }
310
+
311
+ # Input modes inherit the `default` decision template. Lens overrides
312
+ # (`pr`/`design`/`optimize`/`analysis`) pick their own row.
313
+ _INPUT_MODE_TO_SYNTHESIS_KEY = {
314
+ "prompt": "default",
315
+ "roadmap": "default",
316
+ "diff": "default",
317
+ "files": "default",
134
318
  }
135
319
 
136
320
 
321
+ def synthesis_template(mode: str | None) -> str:
322
+ """Return the synthesis-prompt body for a given mode.
323
+
324
+ `mode=None` collapses to the `default` decision template (back-
325
+ compat for callers that do not thread the lens through). Unknown
326
+ modes raise ValueError — fail closed, never silently passthrough.
327
+
328
+ Returns an empty string for creative lenses (`design`/`optimize`)
329
+ so callers can detect "no template, render bare" without a magic
330
+ sentinel.
331
+ """
332
+ if mode is None:
333
+ return _SYNTHESIS_TABLE["default"]
334
+ if mode in _SYNTHESIS_TABLE:
335
+ return _SYNTHESIS_TABLE[mode]
336
+ if mode in _INPUT_MODE_TO_SYNTHESIS_KEY:
337
+ return _SYNTHESIS_TABLE[_INPUT_MODE_TO_SYNTHESIS_KEY[mode]]
338
+ raise ValueError(
339
+ f"Unknown synthesis mode {mode!r}. "
340
+ f"Expected one of: {sorted(set(_SYNTHESIS_TABLE) | set(_INPUT_MODE_TO_SYNTHESIS_KEY))}"
341
+ )
342
+
343
+
344
+ def all_synthesis_modes() -> list[str]:
345
+ """Return the lens keys that have explicit synthesis templates."""
346
+ return sorted(_SYNTHESIS_TABLE)
347
+
348
+
137
349
  def _strip_host_identity(text: str) -> str:
138
350
  """Drop any *whole line* containing a host-agent identity substring.
139
351
 
@@ -230,3 +442,126 @@ def system_prompt_for(
230
442
 
231
443
  def all_modes() -> list[str]:
232
444
  return sorted(_MODE_TABLE)
445
+
446
+
447
+ def advisor_system_prompt(
448
+ persona_text: str,
449
+ *,
450
+ project: ProjectContext | None = None,
451
+ original_ask: str = "",
452
+ ) -> str:
453
+ """Build the system prompt for an advisor-mode call (Phase 6).
454
+
455
+ Layout: neutral handoff preamble (same shape every council member
456
+ sees, regardless of mode) + the advisor's persona body. The
457
+ mode-specific addendum from ``_MODE_TABLE`` is intentionally
458
+ replaced — the persona file owns the full instructional surface
459
+ for an advisor call.
460
+ """
461
+ head = handoff_preamble(project, original_ask)
462
+ body = (persona_text or "").strip()
463
+ if not body:
464
+ raise ValueError("advisor_system_prompt: persona_text is empty.")
465
+ return f"{head}\n\n{body}"
466
+
467
+
468
+
469
+ def build_extraction_user_prompt(original_analysis: str) -> str:
470
+ """User-message body for the finding-extraction pass.
471
+
472
+ Pairs the prior analysis text with the extraction-prompt rules so
473
+ the member re-emits its own findings in machine-readable form.
474
+ """
475
+ cleaned = _strip_host_identity(original_analysis or "").strip()
476
+ return f"{FINDING_EXTRACTION_PROMPT}\n\n---\n\n{cleaned}"
477
+
478
+
479
+ def build_scoring_user_prompt(anonymised: dict[str, str]) -> str:
480
+ """User-message body for the scoring pass.
481
+
482
+ `anonymised` maps `Finding-A`/`Finding-B`/… → finding text. Author
483
+ identities MUST already be stripped — this function does NOT
484
+ re-anonymise, it just renders.
485
+ """
486
+ lines = [FINDING_SCORING_PROMPT, "", "---", ""]
487
+ for label, text in anonymised.items():
488
+ lines.append(f"### {label}\n\n{text}")
489
+ return "\n\n".join(lines)
490
+
491
+
492
+ # ── Peer-review (Phase 5 / F1, Karpathy anonymous review) ────────────
493
+ #
494
+ # After the final deliberation round, each member sees the OTHER
495
+ # members' deliberation outputs under neutral `Response-A` / `Response-B`
496
+ # labels and produces a Karpathy-style critique: strongest response,
497
+ # weakest blind spot, what all of them missed. Provider identity is
498
+ # stripped (Iron Law of Neutrality § peer-review); advisor persona
499
+ # labels (Phase 6) are preserved by the caller via `anonymize_responses`.
500
+ #
501
+ # Reviewers never see their own response — that is by design (the
502
+ # orchestrator filters self before calling `build_peer_review_user_prompt`).
503
+
504
+ PEER_REVIEW_PROMPT = """\
505
+ Below are responses from other independent reviewers to the same
506
+ artefact you just reviewed. Each is labelled with a neutral identifier
507
+ (`Response-A`, `Response-B`, …). You do NOT know which model produced
508
+ which response. Critique them as a peer — your goal is to surface
509
+ signal the round-1 deliberation may have missed.
510
+
511
+ Respond in plain prose under exactly these four headings:
512
+
513
+ ### Strongest response
514
+ Name the single response whose argument or evidence is most
515
+ load-bearing. Cite the label. One paragraph.
516
+
517
+ ### Weakest blind spot
518
+ The single most important thing one specific response missed,
519
+ glossed over, or got wrong. Cite the label. One paragraph.
520
+
521
+ ### What everyone missed
522
+ A point none of the responses raised but that the artefact's context
523
+ suggests is load-bearing. One paragraph. Mark as `needs-verification`
524
+ when you inferred it rather than read it directly from the artefact.
525
+
526
+ ### Refinement
527
+ One sentence: which course the synthesizer should prefer in light of
528
+ the above, grounded in the strongest converged signal.
529
+
530
+ Rules:
531
+ - Cite labels exactly as given (`Response-A`, not `A` or `the first one`).
532
+ - Do not invent agreement or disagreement that is not visible in the
533
+ responses themselves.
534
+ - You may NOT see your own response in the list — that is by design.
535
+ """.strip()
536
+
537
+ PEER_REVIEW_SYNTHESIS_ADDENDUM = """\
538
+
539
+ ### Peer-Review-Surfaced Blind Spots
540
+ Items the peer-review round surfaced that the round-1 responses did
541
+ not. Cite the peer-reviewer label and the targeted response label
542
+ (`Reviewer A on Response-B: <one-line summary>`). Maximum three.
543
+ """.rstrip()
544
+
545
+
546
+ def build_peer_review_user_prompt(anonymised: dict[str, str]) -> str:
547
+ """User-message body for the peer-review pass.
548
+
549
+ `anonymised` maps `Response-A` / `Response-B` / … → response text.
550
+ Provider identities MUST already be stripped by the caller (see
551
+ `consensus.anonymize_responses`); this function does NOT re-anonymise,
552
+ it just renders.
553
+ """
554
+ lines = [PEER_REVIEW_PROMPT, "", "---", ""]
555
+ for label, text in anonymised.items():
556
+ lines.append(f"### {label}\n\n{text}")
557
+ return "\n\n".join(lines)
558
+
559
+
560
+ def peer_review_synthesis_addendum() -> str:
561
+ """Return the synthesis-template addendum used when peer-review fired.
562
+
563
+ Appended to the lens-specific synthesis template by the renderer.
564
+ Creative-lens (prose) runs receive only the bare section header so
565
+ the host agent can write free-form synthesis underneath it.
566
+ """
567
+ return PEER_REVIEW_SYNTHESIS_ADDENDUM
@@ -58,10 +58,15 @@ _LINK_RE = re.compile(r'\[[^\]]*\]\(([^)#\s]+)(?:#[^)]*)?\)')
58
58
  # Body-link prefixes whose resolution is intentionally out of scope.
59
59
  # Council Decision 2 (2026-05-06): P3.1 was cancelled, so guideline links
60
60
  # under `.agent-src/rules/` cannot resolve in the projected tree. Copilot
61
- # suppression (P6) is the silencer for the noise.
61
+ # suppression (P6) is the silencer for the noise. `docs/contracts/` shares
62
+ # the same shape as `docs/guidelines/` — both live at repo root and the
63
+ # rewriter collapses `../../docs/{contracts,guidelines}/...` to a
64
+ # `../docs/...` form that cannot resolve under `.agent-src/`.
62
65
  UNCHECKED_LINK_PREFIXES = (
63
66
  "../docs/guidelines/",
64
67
  "../../docs/guidelines/",
68
+ "../docs/contracts/",
69
+ "../../docs/contracts/",
65
70
  )
66
71
 
67
72
 
@@ -39,6 +39,17 @@ SKIP_DIRS = [
39
39
  "agents/council-questions", # design Q&A trail — forward-refs to planned artifacts
40
40
  "agents/analysis", # plate-comparison working docs — forward-refs to planned artifacts
41
41
  ]
42
+
43
+ # Per-file opt-out marker. When present in the first 10 lines of a .md
44
+ # file, the entire file is skipped. Use for working docs that
45
+ # intentionally reference planned-but-not-yet-existing artifacts
46
+ # (audit bundles, design Q&A, in-flight plans).
47
+ FILE_SKIP_MARKER = "<!-- check-refs: skip -->"
48
+
49
+ # Per-line opt-out marker. When present anywhere on a line, that line's
50
+ # refs are skipped. Use for isolated forward-refs inside otherwise
51
+ # fully-checked documents.
52
+ LINE_IGNORE_MARKER = "<!-- ref-ignore -->"
42
53
  ROOT = Path(".")
43
54
 
44
55
  # YAML memory files (engineering-memory layer) live under `agents/memory/`.
@@ -219,6 +230,14 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
219
230
  except Exception:
220
231
  return broken
221
232
 
233
+ # File-level opt-out: working docs that intentionally reference
234
+ # planned-but-not-yet-existing artifacts mark themselves with
235
+ # `<!-- check-refs: skip -->` in the first 10 lines. Marker pairs
236
+ # with the per-line `<!-- ref-ignore -->` below; either suffices.
237
+ header_lines = text.splitlines()[:10]
238
+ if any(FILE_SKIP_MARKER in line for line in header_lines):
239
+ return broken
240
+
222
241
  # Validate `personas:` frontmatter entries against known persona ids.
223
242
  for line_no, pid in _extract_personas_frontmatter(text):
224
243
  if pid not in artifacts["personas"]:
@@ -241,6 +260,12 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
241
260
  if in_code_block:
242
261
  continue
243
262
 
263
+ # Per-line opt-out: isolated forward-refs in otherwise checked
264
+ # documents (e.g. one ref to a planned skill, surrounded by
265
+ # valid refs). Skip the whole line's path / skill / rule checks.
266
+ if LINE_IGNORE_MARKER in line:
267
+ continue
268
+
244
269
  # Unchecked TODO checkboxes document future work — their refs are
245
270
  # forward-looking and will not resolve yet. Track multi-line bullets:
246
271
  # any `- [ ]` opens a TODO context; a new top-level bullet, heading,
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+ """CI-time / local-edit-time ratio (council file 07, Phase 2.3).
3
+
4
+ Samples the last N commits on a branch, classifies each by touched
5
+ paths (doc / skill / test / meta / mixed), and computes:
6
+
7
+ ratio = ci_time / local_time
8
+
9
+ where:
10
+ - `local_time` = delta between author-date of the *previous* commit and
11
+ author-date of the current commit, capped at 60 min to filter breaks.
12
+ - `ci_time` = sum of GitHub Actions workflow durations for that commit
13
+ sha (via `gh run list --commit <sha>`).
14
+
15
+ Threshold rule (Round-3 Sonnet protocol):
16
+ - Median ratio > 5× for any frequent class → that class needs a cheaper tier
17
+ - Median ratio < 3× across all classes → structural overhead acceptable
18
+
19
+ Output: human-readable table on stdout + JSON to
20
+ `agents/reports/ci-time-ratio.json`.
21
+
22
+ Usage:
23
+ python3 scripts/ci_time_ratio.py --limit 30
24
+ python3 scripts/ci_time_ratio.py --branch main --limit 30 --out path.json
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import argparse
30
+ import json
31
+ import statistics
32
+ import subprocess
33
+ import sys
34
+ from collections import defaultdict
35
+ from pathlib import Path
36
+
37
+ REPO_ROOT = Path(__file__).resolve().parent.parent
38
+ DEFAULT_OUT = REPO_ROOT / "agents" / "reports" / "ci-time-ratio.json"
39
+
40
+ LOCAL_TIME_CAP_S = 60 * 60 # cap a single edit window at 60 min
41
+ THRESHOLD_FAIL = 5.0
42
+ THRESHOLD_PASS = 3.0
43
+
44
+
45
+ def run(cmd: list[str]) -> str:
46
+ return subprocess.check_output(cmd, cwd=REPO_ROOT, text=True)
47
+
48
+
49
+ def list_commits(branch: str, limit: int) -> list[dict]:
50
+ out = run(["git", "log", branch, f"-n{limit + 1}",
51
+ "--format=%H\t%at\t%s"]).strip().splitlines()
52
+ rows = []
53
+ for line in out:
54
+ sha, ts, subject = line.split("\t", 2)
55
+ rows.append({"sha": sha, "timestamp": int(ts), "subject": subject})
56
+ return rows
57
+
58
+
59
+ def classify(sha: str) -> str:
60
+ files = run(["git", "show", "--name-only", "--format=", sha]).strip().splitlines()
61
+ files = [f for f in files if f]
62
+ if not files:
63
+ return "empty"
64
+ doc = sum(1 for f in files if f.startswith("docs/") or f.endswith(".md"))
65
+ skill = sum(1 for f in files if "/skills/" in f or f.startswith(".agent-src.uncompressed/skills/"))
66
+ test = sum(1 for f in files if f.startswith("tests/") or "/tests/" in f)
67
+ meta = sum(1 for f in files if f.startswith(("Taskfile", "scripts/", ".github/", "pyproject", "package")))
68
+ total = len(files)
69
+ # Single-class dominance: 70% of touched files in one bucket
70
+ for label, n in [("skill", skill), ("test", test), ("doc", doc), ("meta", meta)]:
71
+ if n >= max(1, int(total * 0.7)):
72
+ return label
73
+ return "mixed"
74
+
75
+
76
+ def ci_duration_for(sha: str) -> int | None:
77
+ """Total wall-clock seconds for all completed runs of this commit."""
78
+ try:
79
+ out = run(["gh", "run", "list", "--commit", sha, "--limit", "20",
80
+ "--json", "createdAt,updatedAt,status,conclusion"])
81
+ except subprocess.CalledProcessError:
82
+ return None
83
+ runs = json.loads(out)
84
+ if not runs:
85
+ return None
86
+ durations = []
87
+ for r in runs:
88
+ if r.get("status") != "completed":
89
+ continue
90
+ from datetime import datetime
91
+ c = datetime.fromisoformat(r["createdAt"].replace("Z", "+00:00"))
92
+ u = datetime.fromisoformat(r["updatedAt"].replace("Z", "+00:00"))
93
+ durations.append((u - c).total_seconds())
94
+ if not durations:
95
+ return None
96
+ # Workflows run in parallel — wall-clock is the max, not the sum.
97
+ return int(max(durations))
98
+
99
+
100
+ def collect(branch: str, limit: int) -> list[dict]:
101
+ commits = list_commits(branch, limit)
102
+ if len(commits) < 2:
103
+ return []
104
+ rows = []
105
+ for i in range(len(commits) - 1):
106
+ cur, prev = commits[i], commits[i + 1]
107
+ local_s = min(cur["timestamp"] - prev["timestamp"], LOCAL_TIME_CAP_S)
108
+ if local_s < 30:
109
+ continue
110
+ ci_s = ci_duration_for(cur["sha"])
111
+ if ci_s is None:
112
+ continue
113
+ cls = classify(cur["sha"])
114
+ rows.append({
115
+ "sha": cur["sha"][:10], "class": cls,
116
+ "local_s": local_s, "ci_s": ci_s,
117
+ "ratio": round(ci_s / local_s, 2) if local_s else None,
118
+ "subject": cur["subject"][:80],
119
+ })
120
+ return rows
121
+
122
+
123
+ def summarise(rows: list[dict]) -> dict:
124
+ by_class: dict[str, list[float]] = defaultdict(list)
125
+ for r in rows:
126
+ if r["ratio"] is not None:
127
+ by_class[r["class"]].append(r["ratio"])
128
+ summary = {}
129
+ for cls, ratios in sorted(by_class.items()):
130
+ m = statistics.median(ratios)
131
+ if m > THRESHOLD_FAIL:
132
+ verdict = "optimise"
133
+ elif m < THRESHOLD_PASS:
134
+ verdict = "acceptable"
135
+ else:
136
+ verdict = "watch"
137
+ summary[cls] = {"n": len(ratios), "median": round(m, 2),
138
+ "min": round(min(ratios), 2), "max": round(max(ratios), 2),
139
+ "verdict": verdict}
140
+ all_ratios = [r["ratio"] for r in rows if r["ratio"] is not None]
141
+ overall = {"n": len(all_ratios),
142
+ "median": round(statistics.median(all_ratios), 2) if all_ratios else None,
143
+ "verdict": ("acceptable" if all_ratios and statistics.median(all_ratios) < THRESHOLD_PASS
144
+ else "needs-review" if all_ratios else "no-data")}
145
+ return {"overall": overall, "by_class": summary}
146
+
147
+
148
+ def main() -> int:
149
+ p = argparse.ArgumentParser()
150
+ p.add_argument("--branch", default="HEAD")
151
+ p.add_argument("--limit", type=int, default=30)
152
+ p.add_argument("--out", type=Path, default=DEFAULT_OUT)
153
+ args = p.parse_args()
154
+ rows = collect(args.branch, args.limit)
155
+ report = summarise(rows)
156
+ report["sample"] = rows
157
+ args.out.parent.mkdir(parents=True, exist_ok=True)
158
+ args.out.write_text(json.dumps(report, indent=2) + "\n")
159
+ print(f"✅ Wrote {args.out.relative_to(REPO_ROOT)} (n={report['overall']['n']})")
160
+ ov = report["overall"]
161
+ print(f" overall median ratio: {ov['median']}× → {ov['verdict']}")
162
+ for cls, s in report["by_class"].items():
163
+ print(f" {cls:7} n={s['n']:2} median={s['median']:.2f}× range=[{s['min']}–{s['max']}] {s['verdict']}")
164
+ return 0
165
+
166
+
167
+ if __name__ == "__main__":
168
+ sys.exit(main())