@event4u/agent-config 2.11.0 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-src/commands/council/analysis.md +142 -0
- package/.agent-src/commands/council/debate.md +129 -0
- package/.agent-src/commands/council/default.md +8 -0
- package/.agent-src/commands/council/design.md +16 -12
- package/.agent-src/commands/council/optimize.md +16 -15
- package/.agent-src/commands/council/pr.md +12 -12
- package/.agent-src/commands/council.md +48 -2
- package/.agent-src/personas/advisors/contrarian.md +95 -0
- package/.agent-src/personas/advisors/executor.md +99 -0
- package/.agent-src/personas/advisors/expansionist.md +98 -0
- package/.agent-src/personas/advisors/first-principles.md +98 -0
- package/.agent-src/personas/advisors/outsider.md +102 -0
- package/.agent-src/rules/copilot-routing.md +19 -0
- package/.agent-src/rules/devcontainer-routing.md +20 -0
- package/.agent-src/rules/laravel-routing.md +20 -0
- package/.agent-src/rules/symfony-routing.md +20 -0
- package/.agent-src/skills/ai-council/SKILL.md +180 -2
- package/.agent-src/skills/canvas-design/SKILL.md +132 -0
- package/.agent-src/skills/canvas-design/evals/triggers.json +16 -0
- package/.agent-src/skills/copilot-config/SKILL.md +1 -1
- package/.agent-src/skills/devcontainer/SKILL.md +1 -1
- package/.agent-src/skills/doc-coauthoring/SKILL.md +129 -0
- package/.agent-src/skills/doc-coauthoring/evals/triggers.json +16 -0
- package/.agent-src/skills/laravel/SKILL.md +1 -1
- package/.agent-src/skills/project-analysis-core/SKILL.md +1 -1
- package/.agent-src/skills/project-analyzer/SKILL.md +1 -1
- package/.agent-src/skills/skill-writing/SKILL.md +101 -16
- package/.agent-src/skills/sql-writing/SKILL.md +1 -1
- package/.agent-src/skills/symfony-workflow/SKILL.md +1 -1
- package/.agent-src/skills/universal-project-analysis/SKILL.md +1 -1
- package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
- package/.claude-plugin/marketplace.json +5 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +78 -0
- package/CONTRIBUTING.md +5 -0
- package/README.md +3 -3
- package/config/agent-settings.template.yml +5 -84
- package/docs/architecture/multi-tool-projection.md +53 -0
- package/docs/architecture/{compression.md → source-projection.md} +21 -3
- package/docs/architecture.md +6 -6
- package/docs/catalog.md +21 -11
- package/docs/contracts/adr-architectural-consensus-mechanism.md +67 -0
- package/docs/contracts/adr-level-6-productization.md +2 -2
- package/docs/contracts/ai-council-config.md +186 -0
- package/docs/contracts/command-clusters.md +57 -1
- package/docs/contracts/multi-tool-projection-fidelity.md +109 -0
- package/docs/getting-started.md +2 -2
- package/package.json +1 -1
- package/scripts/_archive/README.md +59 -0
- package/scripts/ai_council/_default_prices.py +10 -1
- package/scripts/ai_council/advisors.py +148 -0
- package/scripts/ai_council/clients.py +189 -4
- package/scripts/ai_council/config.py +368 -0
- package/scripts/ai_council/consensus.py +290 -0
- package/scripts/ai_council/orchestrator.py +634 -16
- package/scripts/ai_council/prompts.py +335 -0
- package/scripts/check_compressed_paths.py +6 -1
- package/scripts/check_references.py +25 -0
- package/scripts/ci_time_ratio.py +168 -0
- package/scripts/council_cli.py +1007 -32
- package/scripts/measure_projection_bytes.py +159 -0
- package/scripts/measure_roadmap_trajectory.py +112 -0
- package/scripts/probe_projection_fidelity.py +202 -0
- package/scripts/run_skill_evals.py +185 -0
- package/scripts/schemas/skill.schema.json +4 -0
- package/scripts/score_skill_selection.py +198 -0
- package/scripts/skill_collision_clusters.py +162 -0
- package/scripts/skill_linter.py +71 -1
- /package/scripts/{_backfill_skill_domains.py → _archive/_backfill_skill_domains.py} +0 -0
- /package/scripts/{_bootstrap_tier_frontmatter.py → _archive/_bootstrap_tier_frontmatter.py} +0 -0
- /package/scripts/{_p43_bodies.py → _archive/_p43_bodies.py} +0 -0
- /package/scripts/{_p43_compress.py → _archive/_p43_compress.py} +0 -0
- /package/scripts/{_p4_migrate.py → _archive/_p4_migrate.py} +0 -0
- /package/scripts/{_phase2_shim_helper.py → _archive/_phase2_shim_helper.py} +0 -0
- /package/scripts/{_pilot_council_question.py → _archive/_pilot_council_question.py} +0 -0
|
@@ -122,6 +122,42 @@ MUST:
|
|
|
122
122
|
evidence in the artefact).
|
|
123
123
|
""".strip()
|
|
124
124
|
|
|
125
|
+
ANALYSIS_MODE = """\
|
|
126
|
+
The artefact is a local analysis output (from a project analyzer,
|
|
127
|
+
audit script, or codebase scan). Critique the **analysis itself**, not
|
|
128
|
+
the underlying codebase. You MUST:
|
|
129
|
+
1. Flag findings that are restated under different headings —
|
|
130
|
+
deduplicate aggressively. The downstream consumer wants a unique
|
|
131
|
+
Top-N, not a long list with overlap.
|
|
132
|
+
2. Score the evidence quality of each finding: confirmed (the
|
|
133
|
+
analysis cites file:line / metric), inferred (plausible from
|
|
134
|
+
stated context), or speculative (no citation, vibes-only).
|
|
135
|
+
Speculative findings must be called out by name.
|
|
136
|
+
3. Identify findings that are roadmap-ready (concrete enough to land
|
|
137
|
+
as a phase step) vs ones that need a discovery loop first.
|
|
138
|
+
4. Propose 3–5 follow-up actions ranked by leverage — what the next
|
|
139
|
+
roadmap should attack first. Cite the supporting finding(s) by id
|
|
140
|
+
or heading.
|
|
141
|
+
End with: a Top-N consensus list (one bullet per finding the
|
|
142
|
+
analysis surfaces) plus a single sentence on the strongest blind
|
|
143
|
+
spot the analysis itself has.
|
|
144
|
+
""".strip()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
DEBATE_MODE = """\
|
|
148
|
+
The artefact is the topic of a structured multi-round debate. You are
|
|
149
|
+
one of several independent reviewers. Round-specific instructions:
|
|
150
|
+
1. Round 1 — state your strongest, most defensible position on the
|
|
151
|
+
topic. Argue from evidence and first principles. Do not hedge.
|
|
152
|
+
2. Round 2+ — read the anonymised positions from the previous round.
|
|
153
|
+
Identify the SINGLE strongest opposing position and write a
|
|
154
|
+
rebuttal addressed at its strongest steel-manned form. Your task
|
|
155
|
+
is to find the load-bearing flaw the opposing reviewer missed —
|
|
156
|
+
do NOT search for common ground.
|
|
157
|
+
End each round with: a one-line position summary and the single
|
|
158
|
+
piece of evidence that would change your mind.
|
|
159
|
+
""".strip()
|
|
160
|
+
|
|
125
161
|
|
|
126
162
|
_MODE_TABLE = {
|
|
127
163
|
"prompt": PROMPT_MODE,
|
|
@@ -131,9 +167,185 @@ _MODE_TABLE = {
|
|
|
131
167
|
"pr": PR_MODE,
|
|
132
168
|
"design": DESIGN_MODE,
|
|
133
169
|
"optimize": OPTIMIZE_MODE,
|
|
170
|
+
"analysis": ANALYSIS_MODE,
|
|
171
|
+
"debate": DEBATE_MODE,
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ── Consensus-scoring prompts (Phase 4 / F3) ──────────────────────────
|
|
176
|
+
#
|
|
177
|
+
# Two-step extraction + scoring round used by the analysis lens. The
|
|
178
|
+
# extraction pass asks each member to surface its own top findings in
|
|
179
|
+
# a strict JSON shape; the scoring pass asks each member to rate
|
|
180
|
+
# anonymised findings produced by the *other* members.
|
|
181
|
+
#
|
|
182
|
+
# Iron Law of Neutrality applies to both: the extraction prompt never
|
|
183
|
+
# names other reviewers, and the scoring prompt strips the source
|
|
184
|
+
# author by using `Finding-A` / `Finding-B` labels (see
|
|
185
|
+
# `consensus.anonymize_findings`).
|
|
186
|
+
|
|
187
|
+
FINDING_EXTRACTION_PROMPT = """\
|
|
188
|
+
You have just produced an analysis. Re-emit your top findings as a
|
|
189
|
+
strict JSON array suitable for downstream tooling. Each item MUST
|
|
190
|
+
have:
|
|
191
|
+
|
|
192
|
+
{"id": "<short-slug>", "text": "<one-sentence finding>"}
|
|
193
|
+
|
|
194
|
+
Rules:
|
|
195
|
+
- 3-7 findings, ordered by importance (most important first).
|
|
196
|
+
- `id` is a 1-3 word kebab-case slug, unique within your array.
|
|
197
|
+
- `text` is a single sentence, no markdown, no reviewer self-reference.
|
|
198
|
+
- Wrap the array in a ```json``` fenced block. No commentary outside it.
|
|
199
|
+
""".strip()
|
|
200
|
+
|
|
201
|
+
FINDING_SCORING_PROMPT = """\
|
|
202
|
+
Below are findings from other independent reviewers, presented with
|
|
203
|
+
neutral labels (Finding-A, Finding-B, …). Score each one on its
|
|
204
|
+
merits. You MUST emit a strict JSON array, one entry per finding,
|
|
205
|
+
in this shape:
|
|
206
|
+
|
|
207
|
+
{"finding_id": "Finding-A", "score": 1-10, "agree": true|false,
|
|
208
|
+
"reason": "<one-sentence justification>"}
|
|
209
|
+
|
|
210
|
+
Rules:
|
|
211
|
+
- `score` is an integer 1 (weak / irrelevant) to 10 (load-bearing /
|
|
212
|
+
must-address).
|
|
213
|
+
- `agree=true` means you would surface this same finding yourself;
|
|
214
|
+
`agree=false` means you think it is wrong, overstated, or off-topic.
|
|
215
|
+
- `reason` is a single sentence, no markdown.
|
|
216
|
+
- Wrap the array in a ```json``` fenced block. No commentary outside it.
|
|
217
|
+
|
|
218
|
+
You may not see your own findings in the list — that is by design.
|
|
219
|
+
""".strip()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ── Synthesis templates (Phase 3 / F2) ────────────────────────────────
|
|
223
|
+
#
|
|
224
|
+
# Lens-aware synthesis prompts. Each entry maps a lens key onto the
|
|
225
|
+
# block the host agent should produce when summarising member responses.
|
|
226
|
+
# R4 Q4 split: decision lenses get a Karpathy-structured template;
|
|
227
|
+
# creative lenses (design / optimize) stay open-ended prose (empty
|
|
228
|
+
# string → renderer falls back to the bare "Convergence / Divergence"
|
|
229
|
+
# slot). Input modes (prompt / roadmap / diff / files) map onto the
|
|
230
|
+
# `default` decision template via `synthesis_template()`.
|
|
231
|
+
|
|
232
|
+
DEFAULT_SYNTHESIS = """\
|
|
233
|
+
Summarise the council using the structured shape below. Be terse,
|
|
234
|
+
cite reviewers by label, and refuse to invent agreement that is not
|
|
235
|
+
in the responses.
|
|
236
|
+
|
|
237
|
+
### Agreement
|
|
238
|
+
Points that two or more reviewers converged on, each as a single line.
|
|
239
|
+
|
|
240
|
+
### Clashes
|
|
241
|
+
Points where reviewers disagreed. State both sides with a one-line
|
|
242
|
+
reviewer-label citation per side.
|
|
243
|
+
|
|
244
|
+
### Blind spots
|
|
245
|
+
Items that none of the reviewers raised but that the artefact's
|
|
246
|
+
context suggests are load-bearing. Maximum three. Mark each as
|
|
247
|
+
`needs-verification` when the host agent inferred it rather than
|
|
248
|
+
read it directly from a response.
|
|
249
|
+
|
|
250
|
+
### Recommendation
|
|
251
|
+
A single sentence: which course the host agent should advise the
|
|
252
|
+
user to take, grounded in the strongest converged point.
|
|
253
|
+
|
|
254
|
+
### Next step
|
|
255
|
+
One concrete next action the user can take in their current turn.
|
|
256
|
+
""".strip()
|
|
257
|
+
|
|
258
|
+
PR_SYNTHESIS = """\
|
|
259
|
+
Summarise the council with the PR-review shape below.
|
|
260
|
+
|
|
261
|
+
### Consensus
|
|
262
|
+
Findings where two or more reviewers agreed, each one a single line.
|
|
263
|
+
|
|
264
|
+
### Conflicts
|
|
265
|
+
Findings where reviewers disagreed. State both sides with reviewer
|
|
266
|
+
labels; do not pick a winner here — that lives in the recommendation.
|
|
267
|
+
|
|
268
|
+
### Must-fix before merge
|
|
269
|
+
Items at least one reviewer marked `REQUEST_CHANGES` or `REJECT`
|
|
270
|
+
and the host agent confirms are load-bearing. Maximum five.
|
|
271
|
+
|
|
272
|
+
### Recommendation
|
|
273
|
+
APPROVE / REQUEST_CHANGES / REJECT and a single sentence justifying
|
|
274
|
+
the verdict, anchored on the strongest consensus or must-fix line.
|
|
275
|
+
""".strip()
|
|
276
|
+
|
|
277
|
+
ANALYSIS_SYNTHESIS = """\
|
|
278
|
+
Summarise the council with the analysis-lens shape below.
|
|
279
|
+
|
|
280
|
+
### Top-10 by consensus
|
|
281
|
+
Findings ranked by how many reviewers surfaced them. Format each
|
|
282
|
+
line as: `N. <finding> — cited by <reviewer labels> · evidence:
|
|
283
|
+
confirmed | inferred | speculative · roadmap-ready: yes | needs-discovery`.
|
|
284
|
+
Stop at ten or when only single-reviewer items remain, whichever
|
|
285
|
+
comes first.
|
|
286
|
+
|
|
287
|
+
### Supporting
|
|
288
|
+
Findings that one reviewer raised and at least one other treated as
|
|
289
|
+
plausible but did not independently surface. One line each, same
|
|
290
|
+
metadata shape as Top-10.
|
|
291
|
+
|
|
292
|
+
### Outliers
|
|
293
|
+
Single-reviewer findings the others did not engage with. Keep them
|
|
294
|
+
— they are signal for a future deeper analysis pass — but mark each
|
|
295
|
+
as `unverified-by-council`.
|
|
296
|
+
""".strip()
|
|
297
|
+
|
|
298
|
+
# Creative lenses — open-ended prose, no template. The renderer keeps
|
|
299
|
+
# the bare "Convergence / Divergence" slot so the host agent can write
|
|
300
|
+
# free-form synthesis.
|
|
301
|
+
_CREATIVE_PASSTHROUGH = ""
|
|
302
|
+
|
|
303
|
+
_SYNTHESIS_TABLE = {
|
|
304
|
+
"default": DEFAULT_SYNTHESIS,
|
|
305
|
+
"pr": PR_SYNTHESIS,
|
|
306
|
+
"analysis": ANALYSIS_SYNTHESIS,
|
|
307
|
+
"design": _CREATIVE_PASSTHROUGH,
|
|
308
|
+
"optimize": _CREATIVE_PASSTHROUGH,
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
# Input modes inherit the `default` decision template. Lens overrides
|
|
312
|
+
# (`pr`/`design`/`optimize`/`analysis`) pick their own row.
|
|
313
|
+
_INPUT_MODE_TO_SYNTHESIS_KEY = {
|
|
314
|
+
"prompt": "default",
|
|
315
|
+
"roadmap": "default",
|
|
316
|
+
"diff": "default",
|
|
317
|
+
"files": "default",
|
|
134
318
|
}
|
|
135
319
|
|
|
136
320
|
|
|
321
|
+
def synthesis_template(mode: str | None) -> str:
|
|
322
|
+
"""Return the synthesis-prompt body for a given mode.
|
|
323
|
+
|
|
324
|
+
`mode=None` collapses to the `default` decision template (back-
|
|
325
|
+
compat for callers that do not thread the lens through). Unknown
|
|
326
|
+
modes raise ValueError — fail closed, never silently passthrough.
|
|
327
|
+
|
|
328
|
+
Returns an empty string for creative lenses (`design`/`optimize`)
|
|
329
|
+
so callers can detect "no template, render bare" without a magic
|
|
330
|
+
sentinel.
|
|
331
|
+
"""
|
|
332
|
+
if mode is None:
|
|
333
|
+
return _SYNTHESIS_TABLE["default"]
|
|
334
|
+
if mode in _SYNTHESIS_TABLE:
|
|
335
|
+
return _SYNTHESIS_TABLE[mode]
|
|
336
|
+
if mode in _INPUT_MODE_TO_SYNTHESIS_KEY:
|
|
337
|
+
return _SYNTHESIS_TABLE[_INPUT_MODE_TO_SYNTHESIS_KEY[mode]]
|
|
338
|
+
raise ValueError(
|
|
339
|
+
f"Unknown synthesis mode {mode!r}. "
|
|
340
|
+
f"Expected one of: {sorted(set(_SYNTHESIS_TABLE) | set(_INPUT_MODE_TO_SYNTHESIS_KEY))}"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def all_synthesis_modes() -> list[str]:
|
|
345
|
+
"""Return the lens keys that have explicit synthesis templates."""
|
|
346
|
+
return sorted(_SYNTHESIS_TABLE)
|
|
347
|
+
|
|
348
|
+
|
|
137
349
|
def _strip_host_identity(text: str) -> str:
|
|
138
350
|
"""Drop any *whole line* containing a host-agent identity substring.
|
|
139
351
|
|
|
@@ -230,3 +442,126 @@ def system_prompt_for(
|
|
|
230
442
|
|
|
231
443
|
def all_modes() -> list[str]:
|
|
232
444
|
return sorted(_MODE_TABLE)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def advisor_system_prompt(
|
|
448
|
+
persona_text: str,
|
|
449
|
+
*,
|
|
450
|
+
project: ProjectContext | None = None,
|
|
451
|
+
original_ask: str = "",
|
|
452
|
+
) -> str:
|
|
453
|
+
"""Build the system prompt for an advisor-mode call (Phase 6).
|
|
454
|
+
|
|
455
|
+
Layout: neutral handoff preamble (same shape every council member
|
|
456
|
+
sees, regardless of mode) + the advisor's persona body. The
|
|
457
|
+
mode-specific addendum from ``_MODE_TABLE`` is intentionally
|
|
458
|
+
replaced — the persona file owns the full instructional surface
|
|
459
|
+
for an advisor call.
|
|
460
|
+
"""
|
|
461
|
+
head = handoff_preamble(project, original_ask)
|
|
462
|
+
body = (persona_text or "").strip()
|
|
463
|
+
if not body:
|
|
464
|
+
raise ValueError("advisor_system_prompt: persona_text is empty.")
|
|
465
|
+
return f"{head}\n\n{body}"
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def build_extraction_user_prompt(original_analysis: str) -> str:
|
|
470
|
+
"""User-message body for the finding-extraction pass.
|
|
471
|
+
|
|
472
|
+
Pairs the prior analysis text with the extraction-prompt rules so
|
|
473
|
+
the member re-emits its own findings in machine-readable form.
|
|
474
|
+
"""
|
|
475
|
+
cleaned = _strip_host_identity(original_analysis or "").strip()
|
|
476
|
+
return f"{FINDING_EXTRACTION_PROMPT}\n\n---\n\n{cleaned}"
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def build_scoring_user_prompt(anonymised: dict[str, str]) -> str:
|
|
480
|
+
"""User-message body for the scoring pass.
|
|
481
|
+
|
|
482
|
+
`anonymised` maps `Finding-A`/`Finding-B`/… → finding text. Author
|
|
483
|
+
identities MUST already be stripped — this function does NOT
|
|
484
|
+
re-anonymise, it just renders.
|
|
485
|
+
"""
|
|
486
|
+
lines = [FINDING_SCORING_PROMPT, "", "---", ""]
|
|
487
|
+
for label, text in anonymised.items():
|
|
488
|
+
lines.append(f"### {label}\n\n{text}")
|
|
489
|
+
return "\n\n".join(lines)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
# ── Peer-review (Phase 5 / F1, Karpathy anonymous review) ────────────
|
|
493
|
+
#
|
|
494
|
+
# After the final deliberation round, each member sees the OTHER
|
|
495
|
+
# members' deliberation outputs under neutral `Response-A` / `Response-B`
|
|
496
|
+
# labels and produces a Karpathy-style critique: strongest response,
|
|
497
|
+
# weakest blind spot, what all of them missed. Provider identity is
|
|
498
|
+
# stripped (Iron Law of Neutrality § peer-review); advisor persona
|
|
499
|
+
# labels (Phase 6) are preserved by the caller via `anonymize_responses`.
|
|
500
|
+
#
|
|
501
|
+
# Reviewers never see their own response — that is by design (the
|
|
502
|
+
# orchestrator filters self before calling `build_peer_review_user_prompt`).
|
|
503
|
+
|
|
504
|
+
PEER_REVIEW_PROMPT = """\
|
|
505
|
+
Below are responses from other independent reviewers to the same
|
|
506
|
+
artefact you just reviewed. Each is labelled with a neutral identifier
|
|
507
|
+
(`Response-A`, `Response-B`, …). You do NOT know which model produced
|
|
508
|
+
which response. Critique them as a peer — your goal is to surface
|
|
509
|
+
signal the round-1 deliberation may have missed.
|
|
510
|
+
|
|
511
|
+
Respond in plain prose under exactly these four headings:
|
|
512
|
+
|
|
513
|
+
### Strongest response
|
|
514
|
+
Name the single response whose argument or evidence is most
|
|
515
|
+
load-bearing. Cite the label. One paragraph.
|
|
516
|
+
|
|
517
|
+
### Weakest blind spot
|
|
518
|
+
The single most important thing one specific response missed,
|
|
519
|
+
glossed over, or got wrong. Cite the label. One paragraph.
|
|
520
|
+
|
|
521
|
+
### What everyone missed
|
|
522
|
+
A point none of the responses raised but that the artefact's context
|
|
523
|
+
suggests is load-bearing. One paragraph. Mark as `needs-verification`
|
|
524
|
+
when you inferred it rather than read it directly from the artefact.
|
|
525
|
+
|
|
526
|
+
### Refinement
|
|
527
|
+
One sentence: which course the synthesizer should prefer in light of
|
|
528
|
+
the above, grounded in the strongest converged signal.
|
|
529
|
+
|
|
530
|
+
Rules:
|
|
531
|
+
- Cite labels exactly as given (`Response-A`, not `A` or `the first one`).
|
|
532
|
+
- Do not invent agreement or disagreement that is not visible in the
|
|
533
|
+
responses themselves.
|
|
534
|
+
- You may NOT see your own response in the list — that is by design.
|
|
535
|
+
""".strip()
|
|
536
|
+
|
|
537
|
+
PEER_REVIEW_SYNTHESIS_ADDENDUM = """\
|
|
538
|
+
|
|
539
|
+
### Peer-Review-Surfaced Blind Spots
|
|
540
|
+
Items the peer-review round surfaced that the round-1 responses did
|
|
541
|
+
not. Cite the peer-reviewer label and the targeted response label
|
|
542
|
+
(`Reviewer A on Response-B: <one-line summary>`). Maximum three.
|
|
543
|
+
""".rstrip()
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def build_peer_review_user_prompt(anonymised: dict[str, str]) -> str:
|
|
547
|
+
"""User-message body for the peer-review pass.
|
|
548
|
+
|
|
549
|
+
`anonymised` maps `Response-A` / `Response-B` / … → response text.
|
|
550
|
+
Provider identities MUST already be stripped by the caller (see
|
|
551
|
+
`consensus.anonymize_responses`); this function does NOT re-anonymise,
|
|
552
|
+
it just renders.
|
|
553
|
+
"""
|
|
554
|
+
lines = [PEER_REVIEW_PROMPT, "", "---", ""]
|
|
555
|
+
for label, text in anonymised.items():
|
|
556
|
+
lines.append(f"### {label}\n\n{text}")
|
|
557
|
+
return "\n\n".join(lines)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def peer_review_synthesis_addendum() -> str:
|
|
561
|
+
"""Return the synthesis-template addendum used when peer-review fired.
|
|
562
|
+
|
|
563
|
+
Appended to the lens-specific synthesis template by the renderer.
|
|
564
|
+
Creative-lens (prose) runs receive only the bare section header so
|
|
565
|
+
the host agent can write free-form synthesis underneath it.
|
|
566
|
+
"""
|
|
567
|
+
return PEER_REVIEW_SYNTHESIS_ADDENDUM
|
|
@@ -58,10 +58,15 @@ _LINK_RE = re.compile(r'\[[^\]]*\]\(([^)#\s]+)(?:#[^)]*)?\)')
|
|
|
58
58
|
# Body-link prefixes whose resolution is intentionally out of scope.
|
|
59
59
|
# Council Decision 2 (2026-05-06): P3.1 was cancelled, so guideline links
|
|
60
60
|
# under `.agent-src/rules/` cannot resolve in the projected tree. Copilot
|
|
61
|
-
# suppression (P6) is the silencer for the noise.
|
|
61
|
+
# suppression (P6) is the silencer for the noise. `docs/contracts/` shares
|
|
62
|
+
# the same shape as `docs/guidelines/` — both live at repo root and the
|
|
63
|
+
# rewriter collapses `../../docs/{contracts,guidelines}/...` to a
|
|
64
|
+
# `../docs/...` form that cannot resolve under `.agent-src/`.
|
|
62
65
|
UNCHECKED_LINK_PREFIXES = (
|
|
63
66
|
"../docs/guidelines/",
|
|
64
67
|
"../../docs/guidelines/",
|
|
68
|
+
"../docs/contracts/",
|
|
69
|
+
"../../docs/contracts/",
|
|
65
70
|
)
|
|
66
71
|
|
|
67
72
|
|
|
@@ -39,6 +39,17 @@ SKIP_DIRS = [
|
|
|
39
39
|
"agents/council-questions", # design Q&A trail — forward-refs to planned artifacts
|
|
40
40
|
"agents/analysis", # plate-comparison working docs — forward-refs to planned artifacts
|
|
41
41
|
]
|
|
42
|
+
|
|
43
|
+
# Per-file opt-out marker. When present in the first 10 lines of a .md
|
|
44
|
+
# file, the entire file is skipped. Use for working docs that
|
|
45
|
+
# intentionally reference planned-but-not-yet-existing artifacts
|
|
46
|
+
# (audit bundles, design Q&A, in-flight plans).
|
|
47
|
+
FILE_SKIP_MARKER = "<!-- check-refs: skip -->"
|
|
48
|
+
|
|
49
|
+
# Per-line opt-out marker. When present anywhere on a line, that line's
|
|
50
|
+
# refs are skipped. Use for isolated forward-refs inside otherwise
|
|
51
|
+
# fully-checked documents.
|
|
52
|
+
LINE_IGNORE_MARKER = "<!-- ref-ignore -->"
|
|
42
53
|
ROOT = Path(".")
|
|
43
54
|
|
|
44
55
|
# YAML memory files (engineering-memory layer) live under `agents/memory/`.
|
|
@@ -219,6 +230,14 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
|
|
|
219
230
|
except Exception:
|
|
220
231
|
return broken
|
|
221
232
|
|
|
233
|
+
# File-level opt-out: working docs that intentionally reference
|
|
234
|
+
# planned-but-not-yet-existing artifacts mark themselves with
|
|
235
|
+
# `<!-- check-refs: skip -->` in the first 10 lines. Marker pairs
|
|
236
|
+
# with the per-line `<!-- ref-ignore -->` below; either suffices.
|
|
237
|
+
header_lines = text.splitlines()[:10]
|
|
238
|
+
if any(FILE_SKIP_MARKER in line for line in header_lines):
|
|
239
|
+
return broken
|
|
240
|
+
|
|
222
241
|
# Validate `personas:` frontmatter entries against known persona ids.
|
|
223
242
|
for line_no, pid in _extract_personas_frontmatter(text):
|
|
224
243
|
if pid not in artifacts["personas"]:
|
|
@@ -241,6 +260,12 @@ def check_file(filepath: Path, artifacts: dict[str, set[str]], root: Path) -> Li
|
|
|
241
260
|
if in_code_block:
|
|
242
261
|
continue
|
|
243
262
|
|
|
263
|
+
# Per-line opt-out: isolated forward-refs in otherwise checked
|
|
264
|
+
# documents (e.g. one ref to a planned skill, surrounded by
|
|
265
|
+
# valid refs). Skip the whole line's path / skill / rule checks.
|
|
266
|
+
if LINE_IGNORE_MARKER in line:
|
|
267
|
+
continue
|
|
268
|
+
|
|
244
269
|
# Unchecked TODO checkboxes document future work — their refs are
|
|
245
270
|
# forward-looking and will not resolve yet. Track multi-line bullets:
|
|
246
271
|
# any `- [ ]` opens a TODO context; a new top-level bullet, heading,
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CI-time / local-edit-time ratio (council file 07, Phase 2.3).
|
|
3
|
+
|
|
4
|
+
Samples the last N commits on a branch, classifies each by touched
|
|
5
|
+
paths (doc / skill / test / meta / mixed), and computes:
|
|
6
|
+
|
|
7
|
+
ratio = ci_time / local_time
|
|
8
|
+
|
|
9
|
+
where:
|
|
10
|
+
- `local_time` = delta between author-date of the *previous* commit and
|
|
11
|
+
author-date of the current commit, capped at 60 min to filter breaks.
|
|
12
|
+
- `ci_time` = sum of GitHub Actions workflow durations for that commit
|
|
13
|
+
sha (via `gh run list --commit <sha>`).
|
|
14
|
+
|
|
15
|
+
Threshold rule (Round-3 Sonnet protocol):
|
|
16
|
+
- Median ratio > 5× for any frequent class → that class needs a cheaper tier
|
|
17
|
+
- Median ratio < 3× across all classes → structural overhead acceptable
|
|
18
|
+
|
|
19
|
+
Output: human-readable table on stdout + JSON to
|
|
20
|
+
`agents/reports/ci-time-ratio.json`.
|
|
21
|
+
|
|
22
|
+
Usage:
|
|
23
|
+
python3 scripts/ci_time_ratio.py --limit 30
|
|
24
|
+
python3 scripts/ci_time_ratio.py --branch main --limit 30 --out path.json
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import argparse
|
|
30
|
+
import json
|
|
31
|
+
import statistics
|
|
32
|
+
import subprocess
|
|
33
|
+
import sys
|
|
34
|
+
from collections import defaultdict
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
|
|
37
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
38
|
+
DEFAULT_OUT = REPO_ROOT / "agents" / "reports" / "ci-time-ratio.json"
|
|
39
|
+
|
|
40
|
+
LOCAL_TIME_CAP_S = 60 * 60 # cap a single edit window at 60 min
|
|
41
|
+
THRESHOLD_FAIL = 5.0
|
|
42
|
+
THRESHOLD_PASS = 3.0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def run(cmd: list[str]) -> str:
|
|
46
|
+
return subprocess.check_output(cmd, cwd=REPO_ROOT, text=True)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def list_commits(branch: str, limit: int) -> list[dict]:
|
|
50
|
+
out = run(["git", "log", branch, f"-n{limit + 1}",
|
|
51
|
+
"--format=%H\t%at\t%s"]).strip().splitlines()
|
|
52
|
+
rows = []
|
|
53
|
+
for line in out:
|
|
54
|
+
sha, ts, subject = line.split("\t", 2)
|
|
55
|
+
rows.append({"sha": sha, "timestamp": int(ts), "subject": subject})
|
|
56
|
+
return rows
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def classify(sha: str) -> str:
|
|
60
|
+
files = run(["git", "show", "--name-only", "--format=", sha]).strip().splitlines()
|
|
61
|
+
files = [f for f in files if f]
|
|
62
|
+
if not files:
|
|
63
|
+
return "empty"
|
|
64
|
+
doc = sum(1 for f in files if f.startswith("docs/") or f.endswith(".md"))
|
|
65
|
+
skill = sum(1 for f in files if "/skills/" in f or f.startswith(".agent-src.uncompressed/skills/"))
|
|
66
|
+
test = sum(1 for f in files if f.startswith("tests/") or "/tests/" in f)
|
|
67
|
+
meta = sum(1 for f in files if f.startswith(("Taskfile", "scripts/", ".github/", "pyproject", "package")))
|
|
68
|
+
total = len(files)
|
|
69
|
+
# Single-class dominance: 70% of touched files in one bucket
|
|
70
|
+
for label, n in [("skill", skill), ("test", test), ("doc", doc), ("meta", meta)]:
|
|
71
|
+
if n >= max(1, int(total * 0.7)):
|
|
72
|
+
return label
|
|
73
|
+
return "mixed"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def ci_duration_for(sha: str) -> int | None:
|
|
77
|
+
"""Total wall-clock seconds for all completed runs of this commit."""
|
|
78
|
+
try:
|
|
79
|
+
out = run(["gh", "run", "list", "--commit", sha, "--limit", "20",
|
|
80
|
+
"--json", "createdAt,updatedAt,status,conclusion"])
|
|
81
|
+
except subprocess.CalledProcessError:
|
|
82
|
+
return None
|
|
83
|
+
runs = json.loads(out)
|
|
84
|
+
if not runs:
|
|
85
|
+
return None
|
|
86
|
+
durations = []
|
|
87
|
+
for r in runs:
|
|
88
|
+
if r.get("status") != "completed":
|
|
89
|
+
continue
|
|
90
|
+
from datetime import datetime
|
|
91
|
+
c = datetime.fromisoformat(r["createdAt"].replace("Z", "+00:00"))
|
|
92
|
+
u = datetime.fromisoformat(r["updatedAt"].replace("Z", "+00:00"))
|
|
93
|
+
durations.append((u - c).total_seconds())
|
|
94
|
+
if not durations:
|
|
95
|
+
return None
|
|
96
|
+
# Workflows run in parallel — wall-clock is the max, not the sum.
|
|
97
|
+
return int(max(durations))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def collect(branch: str, limit: int) -> list[dict]:
|
|
101
|
+
commits = list_commits(branch, limit)
|
|
102
|
+
if len(commits) < 2:
|
|
103
|
+
return []
|
|
104
|
+
rows = []
|
|
105
|
+
for i in range(len(commits) - 1):
|
|
106
|
+
cur, prev = commits[i], commits[i + 1]
|
|
107
|
+
local_s = min(cur["timestamp"] - prev["timestamp"], LOCAL_TIME_CAP_S)
|
|
108
|
+
if local_s < 30:
|
|
109
|
+
continue
|
|
110
|
+
ci_s = ci_duration_for(cur["sha"])
|
|
111
|
+
if ci_s is None:
|
|
112
|
+
continue
|
|
113
|
+
cls = classify(cur["sha"])
|
|
114
|
+
rows.append({
|
|
115
|
+
"sha": cur["sha"][:10], "class": cls,
|
|
116
|
+
"local_s": local_s, "ci_s": ci_s,
|
|
117
|
+
"ratio": round(ci_s / local_s, 2) if local_s else None,
|
|
118
|
+
"subject": cur["subject"][:80],
|
|
119
|
+
})
|
|
120
|
+
return rows
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def summarise(rows: list[dict]) -> dict:
|
|
124
|
+
by_class: dict[str, list[float]] = defaultdict(list)
|
|
125
|
+
for r in rows:
|
|
126
|
+
if r["ratio"] is not None:
|
|
127
|
+
by_class[r["class"]].append(r["ratio"])
|
|
128
|
+
summary = {}
|
|
129
|
+
for cls, ratios in sorted(by_class.items()):
|
|
130
|
+
m = statistics.median(ratios)
|
|
131
|
+
if m > THRESHOLD_FAIL:
|
|
132
|
+
verdict = "optimise"
|
|
133
|
+
elif m < THRESHOLD_PASS:
|
|
134
|
+
verdict = "acceptable"
|
|
135
|
+
else:
|
|
136
|
+
verdict = "watch"
|
|
137
|
+
summary[cls] = {"n": len(ratios), "median": round(m, 2),
|
|
138
|
+
"min": round(min(ratios), 2), "max": round(max(ratios), 2),
|
|
139
|
+
"verdict": verdict}
|
|
140
|
+
all_ratios = [r["ratio"] for r in rows if r["ratio"] is not None]
|
|
141
|
+
overall = {"n": len(all_ratios),
|
|
142
|
+
"median": round(statistics.median(all_ratios), 2) if all_ratios else None,
|
|
143
|
+
"verdict": ("acceptable" if all_ratios and statistics.median(all_ratios) < THRESHOLD_PASS
|
|
144
|
+
else "needs-review" if all_ratios else "no-data")}
|
|
145
|
+
return {"overall": overall, "by_class": summary}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def main() -> int:
|
|
149
|
+
p = argparse.ArgumentParser()
|
|
150
|
+
p.add_argument("--branch", default="HEAD")
|
|
151
|
+
p.add_argument("--limit", type=int, default=30)
|
|
152
|
+
p.add_argument("--out", type=Path, default=DEFAULT_OUT)
|
|
153
|
+
args = p.parse_args()
|
|
154
|
+
rows = collect(args.branch, args.limit)
|
|
155
|
+
report = summarise(rows)
|
|
156
|
+
report["sample"] = rows
|
|
157
|
+
args.out.parent.mkdir(parents=True, exist_ok=True)
|
|
158
|
+
args.out.write_text(json.dumps(report, indent=2) + "\n")
|
|
159
|
+
print(f"✅ Wrote {args.out.relative_to(REPO_ROOT)} (n={report['overall']['n']})")
|
|
160
|
+
ov = report["overall"]
|
|
161
|
+
print(f" overall median ratio: {ov['median']}× → {ov['verdict']}")
|
|
162
|
+
for cls, s in report["by_class"].items():
|
|
163
|
+
print(f" {cls:7} n={s['n']:2} median={s['median']:.2f}× range=[{s['min']}–{s['max']}] {s['verdict']}")
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
sys.exit(main())
|