npm - @event4u/agent-config - Versions diffs - 1.16.0 → 1.17.0 - Mend

@event4u/agent-config 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (203) hide show

package/scripts/_one_off_phase4_dispatch_latency.py ADDED Viewed

@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""Phase 4.3.1 — council cluster dispatch-latency benchmark.
+Measures the wall-clock overhead of the cluster dispatch layer for the
+`/council` family. Compares:
+  baseline: directly read council-pr.md / council-design.md (atomic shape)
+  cluster : read council.md (dispatcher) + parse table + read council-pr.md
+            / council-design.md (cluster shape)
+The dispatch layer in agent-config is a markdown parse, not a runtime
+function, so this benchmarks the file-system + frontmatter + table-row
+extraction cost. Threshold per roadmap § 4.3.1: ≤ +100ms wall-clock.
+"""
+from __future__ import annotations
+import re
+import statistics
+import time
+from pathlib import Path
+ROOT = Path(__file__).resolve().parent.parent
+COMMANDS = ROOT / ".agent-src/commands"
+N_ITER = 1000  # cold + warm; markdown is tiny so we run a lot of iterations
+FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
+TABLE_ROW_RE = re.compile(r"\|\s*`/council\s+([a-z-]+)`\s*\|\s*`([^`]+)`")
+def _read_atomic(target: str) -> str:
+    """Baseline: directly read the routed file (atomic shape)."""
+    path = COMMANDS / f"council-{target}.md"
+    text = path.read_text(encoding="utf-8")
+    m = FRONTMATTER_RE.match(text)
+    if not m:
+        raise RuntimeError(f"no frontmatter in {path}")
+    return text
+def _read_cluster(target: str) -> str:
+    """Cluster: read dispatcher, parse routing table, then read routed file."""
+    dispatcher = (COMMANDS / "council.md").read_text(encoding="utf-8")
+    routes = dict(TABLE_ROW_RE.findall(dispatcher))
+    routed = routes.get(target)
+    if routed is None:
+        raise RuntimeError(f"no route for {target!r} in dispatcher")
+    text = (COMMANDS / routed).read_text(encoding="utf-8")
+    m = FRONTMATTER_RE.match(text)
+    if not m:
+        raise RuntimeError(f"no frontmatter in {routed}")
+    return text
+def _bench(fn, target: str, n: int) -> list[float]:
+    samples: list[float] = []
+    for _ in range(n):
+        t0 = time.perf_counter()
+        fn(target)
+        samples.append((time.perf_counter() - t0) * 1000.0)
+    return samples
+def _summary(name: str, samples: list[float]) -> None:
+    samples = sorted(samples)
+    p50 = statistics.median(samples)
+    p95 = samples[int(len(samples) * 0.95)]
+    p99 = samples[int(len(samples) * 0.99)]
+    mean = statistics.mean(samples)
+    print(f"  {name:18s}  mean={mean:6.3f}ms  p50={p50:6.3f}ms  p95={p95:6.3f}ms  p99={p99:6.3f}ms")
+def main() -> int:
+    print(f"Phase 4.3.1 — council cluster dispatch latency  (n={N_ITER} per probe)")
+    print()
+    overruns = 0
+    for target in ("pr", "design"):
+        print(f"target = /council {target}")
+        # warm cache
+        _read_atomic(target)
+        _read_cluster(target)
+        baseline = _bench(_read_atomic, target, N_ITER)
+        cluster = _bench(_read_cluster, target, N_ITER)
+        _summary("atomic   (baseline)", baseline)
+        _summary("cluster  (dispatcher)", cluster)
+        delta_mean = statistics.mean(cluster) - statistics.mean(baseline)
+        delta_p95 = sorted(cluster)[int(N_ITER * 0.95)] - sorted(baseline)[int(N_ITER * 0.95)]
+        verdict = "PASS" if delta_p95 <= 100.0 else "FAIL"
+        marker = "✅" if verdict == "PASS" else "❌"
+        print(f"  delta-mean = {delta_mean:+.3f}ms  delta-p95 = {delta_p95:+.3f}ms  threshold = +100ms  {marker} {verdict}")
+        print()
+        if delta_p95 > 100.0:
+            overruns += 1
+    if overruns:
+        print(f"❌  {overruns} probe(s) exceeded +100ms p95 threshold.")
+        return 1
+    print("✅  All probes within +100ms p95 threshold.")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/_one_off_phase6_trigger_jaccard.py ADDED Viewed

@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""Phase 6.1 — chat-history-* trigger overlap (Jaccard).
+Source of truth per rule = frontmatter `description:` field
+(the trigger surface that decides when an `auto` rule activates).
+Tokens = lowercased alphanum words length ≥ 3, minus a small
+stop-list of file-name fragments and connective words that carry
+no trigger signal.
+Output: pairwise Jaccard + branch verdict per roadmap § 6.1.
+"""
+from __future__ import annotations
+import re
+from itertools import combinations
+from pathlib import Path
+ROOT = Path(__file__).resolve().parent.parent
+RULES_DIR = ROOT / ".agent-src.uncompressed/rules"
+RULES = [
+    "chat-history-cadence",
+    "chat-history-ownership",
+    "chat-history-visibility",
+]
+STOP = {
+    "the", "and", "for", "with", "from", "via", "per", "not",
+    "into", "onto", "out", "off", "any", "all", "this", "that",
+    "agent", "chat", "history",
+    "agentchathistory", "chathistory",
+    "rule", "rules", "file", "files",
+}
+DESC_RE = re.compile(r'^description:\s*"([^"]+)"', re.MULTILINE)
+TOKEN_RE = re.compile(r"[a-z][a-z0-9_]{2,}")
+def tokens(rule_id: str) -> set[str]:
+    text = (RULES_DIR / f"{rule_id}.md").read_text(encoding="utf-8")
+    m = DESC_RE.search(text)
+    if not m:
+        raise RuntimeError(f"no description in {rule_id}")
+    desc = m.group(1).lower()
+    raw = TOKEN_RE.findall(desc)
+    return {t for t in raw if t not in STOP}
+def jaccard(a: set[str], b: set[str]) -> float:
+    union = a | b
+    if not union:
+        return 0.0
+    return len(a & b) / len(union)
+def main() -> int:
+    sets = {r: tokens(r) for r in RULES}
+    print(f"Phase 6.1 — trigger Jaccard (source: frontmatter `description:`)")
+    print()
+    for r, ts in sets.items():
+        print(f"  {r}  ({len(ts)} tokens)")
+        print(f"    {sorted(ts)}")
+        print()
+    print("Pairwise Jaccard:")
+    print()
+    print(f"  {'pair':55s}  intersect  union  Jaccard")
+    pairs_above = 0
+    for a, b in combinations(RULES, 2):
+        inter = sets[a] & sets[b]
+        union = sets[a] | sets[b]
+        j = jaccard(sets[a], sets[b])
+        marker = " **" if j >= 0.30 else ""
+        print(f"  {a + ' × ' + b:55s}  {len(inter):>8d}  {len(union):>5d}  {j:>6.3f}{marker}")
+        print(f"    intersection: {sorted(inter)}")
+        if j >= 0.30:
+            pairs_above += 1
+    print()
+    if pairs_above >= 2:
+        print(f"VERDICT: ≥ 30% on {pairs_above}/3 pairs → PROCEED to 6.2 (unified shape).")
+        return 0
+    if pairs_above == 1:
+        print(f"VERDICT: mixed ({pairs_above}/3 pairs ≥ 30%) → ESCALATE to council.")
+        return 0
+    print(f"VERDICT: < 30% on all 3 pairs → STOP at 6.1 (orthogonal — current shape optimal).")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/_phase2_shim_helper.py ADDED Viewed

@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""One-shot helper: add `superseded_by:` + `deprecated_in:` + warning
+banner to Phase 2 atomic-command shims.
+Idempotent — if a file already has `superseded_by:` set, it is skipped.
+"""
+from __future__ import annotations
+import sys
+from pathlib import Path
+# (file-stem, "<cluster> <sub>")  — "<cluster> --flag" for flag-based
+PHASE2_SHIMS: list[tuple[str, str]] = [
+    # chat-history cluster
+    ("chat-history-resume",     "chat-history resume"),
+    ("chat-history-clear",      "chat-history clear"),
+    ("chat-history-checkpoint", "chat-history checkpoint"),
+    # agents cluster
+    ("agents-audit",   "agents audit"),
+    ("agents-cleanup", "agents cleanup"),
+    ("agents-prepare", "agents prepare"),
+    # memory cluster
+    ("memory-add",     "memory add"),
+    ("memory-full",    "memory load"),
+    ("memory-promote", "memory promote"),
+    ("propose-memory", "memory propose"),
+    # roadmap cluster
+    ("roadmap-create",  "roadmap create"),
+    ("roadmap-execute", "roadmap execute"),
+    # module cluster
+    ("module-create",  "module create"),
+    ("module-explore", "module explore"),
+    # tests cluster
+    ("tests-create",  "tests create"),
+    ("tests-execute", "tests execute"),
+    # context cluster
+    ("context-create",   "context create"),
+    ("context-refactor", "context refactor"),
+    # override cluster
+    ("override-create", "override create"),
+    ("override-manage", "override manage"),
+    # copilot-agents cluster
+    ("copilot-agents-init",     "copilot-agents init"),
+    ("copilot-agents-optimize", "copilot-agents optimize"),
+    # judge cluster (do-and-judge / do-in-steps now sub-commands)
+    ("do-and-judge", "judge on-diff"),
+    ("do-in-steps",  "judge steps"),
+    # commit / create-pr — flag-based clusters
+    ("commit-in-chunks",     "commit --in-chunks"),
+    ("create-pr-description", "create-pr --description-only"),
+]
+DEPRECATED_IN = "1.17.0"
+COMMANDS_DIR = Path(".agent-src.uncompressed/commands")
+def patch_file(stem: str, target: str) -> str:
+    path = COMMANDS_DIR / f"{stem}.md"
+    if not path.exists():
+        return f"SKIP {stem}: not found"
+    text = path.read_text(encoding="utf-8")
+    if "superseded_by:" in text.split("---", 2)[1] if text.startswith("---") else False:
+        return f"SKIP {stem}: already shimmed"
+    if not text.startswith("---\n"):
+        return f"SKIP {stem}: no frontmatter"
+    end = text.find("\n---\n", 4)
+    if end == -1:
+        return f"SKIP {stem}: malformed frontmatter"
+    fm_block = text[4:end]
+    body = text[end + len("\n---\n"):]
+    if "superseded_by:" in fm_block:
+        return f"SKIP {stem}: already shimmed"
+    new_fm_lines = fm_block.rstrip("\n").splitlines()
+    new_fm_lines.append(f"superseded_by: {target}")
+    new_fm_lines.append(f'deprecated_in: "{DEPRECATED_IN}"')
+    new_fm = "\n".join(new_fm_lines)
+    is_flag = target.startswith(("commit ", "create-pr "))
+    if is_flag:
+        cluster_invocation = f"/{target}"
+    else:
+        cluster_invocation = f"/{target}"
+    banner = (
+        f"> ⚠️  /{stem} is deprecated; use {cluster_invocation} instead.\n"
+        f"> This shim is retained for one release cycle "
+        f"({DEPRECATED_IN} → next minor) and forwards to the same "
+        f"instructions below. See "
+        f"[`docs/contracts/command-clusters.md`]"
+        f"(../../docs/contracts/command-clusters.md).\n\n"
+    )
+    new_text = f"---\n{new_fm}\n---\n\n{banner}{body.lstrip(chr(10))}"
+    path.write_text(new_text, encoding="utf-8")
+    return f"OK   {stem} → {target}"
+def main() -> int:
+    results = []
+    for stem, target in PHASE2_SHIMS:
+        results.append(patch_file(stem, target))
+    for r in results:
+        print(r)
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/scripts/agent-config CHANGED Viewed

@@ -70,6 +70,8 @@ Commands:
                              Usage: chat-history:hook --platform <claude|augment|cursor|cline|windsurf|gemini>
   chat-history:checkpoint    Append a phase-boundary entry to .agent-chat-history
                              (CHECKPOINT fallback for platforms without native hooks)
+  roadmap-progress:hook      PostToolUse hook entry point (read JSON from stdin)
+                             Regenerates roadmaps-progress.md when a tool wrote under agents/roadmaps/
   telemetry:record           Append one artefact-engagement event (default-off)
   telemetry:status           Print artefact-engagement telemetry status (read-only)
   telemetry:report           Aggregate the engagement log into a quartile report
@@ -316,6 +318,13 @@ cmd_chat_history_hook() {
   exec python3 "$script" hook-dispatch "$@"
 }
+cmd_roadmap_progress_hook() {
+  require_python3
+  local script
+  script="$(resolve_script "scripts/roadmap_progress_hook.py")" || return 1
+  exec python3 "$script" "$@"
+}
 cmd_chat_history_checkpoint() {
   require_python3
   local script
@@ -436,6 +445,7 @@ main() {
     refine-ticket:detect)    cmd_refine_ticket_detect "$@" ;;
     chat-history:hook)       cmd_chat_history_hook "$@" ;;
     chat-history:checkpoint) cmd_chat_history_checkpoint "$@" ;;
+    roadmap-progress:hook)   cmd_roadmap_progress_hook "$@" ;;
     telemetry:record)        cmd_telemetry_record "$@" ;;
     telemetry:status)        cmd_telemetry_status "$@" ;;
     telemetry:report)        cmd_telemetry_report "$@" ;;

package/scripts/ai_council/_one_off_2a4_acceptance.py ADDED Viewed

@@ -0,0 +1,208 @@
+"""Council acceptance review of Phase 0.4 2A.4 worked example.
+Purpose: Phase 0.4.3 of road-to-structural-optimization.md requires a
+council acceptance pass on the 2A.4 obligation-keyword-diff contract
+before Phase 2A may begin. The artefact lives at
+`agents/roadmaps/structural-optimization-2A4-example.md` plus two
+sandbox files. Status will move from `draft` to `locked` only on
+ACCEPT or ACCEPT_WITH_REVISIONS where revisions are minor.
+Invocation:
+    .venv/bin/python -m scripts.ai_council._one_off_2a4_acceptance
+"""
+from __future__ import annotations
+import sys
+from pathlib import Path
+from scripts.ai_council.bundler import bundle_files
+from scripts.ai_council.clients import (
+    AnthropicClient,
+    OpenAIClient,
+    load_anthropic_key,
+    load_openai_key,
+)
+from scripts.ai_council.orchestrator import (
+    CostBudget,
+    CouncilQuestion,
+    consult,
+    estimate,
+)
+from scripts.ai_council.pricing import estimate_cost, load_prices
+from scripts.ai_council.project_context import detect_project_context
+from scripts.ai_council.session import SessionManifest, save as save_session
+REPO_ROOT = Path(__file__).resolve().parents[2]
+ARTEFACTS = [
+    REPO_ROOT / "agents/roadmaps/structural-optimization-2A4-example.md",
+    REPO_ROOT / "agents/roadmaps/examples/2A4-direct-answers/direct-answers.slim.md",
+    REPO_ROOT / "agents/roadmaps/examples/2A4-direct-answers/direct-answers-mechanics.md",
+]
+ORIGINAL_ASK = (
+    "Phase 0.4 of road-to-structural-optimization v3.1 dry-runs the "
+    "2A.4 obligation-keyword-diff contract on `direct-answers.md` to "
+    "lock the contract before Phase 2A begins. The artefact and two "
+    "sandbox files (slim rule + extracted mechanics) are presented. "
+    "Council task: ACCEPT / ACCEPT_WITH_REVISIONS / REJECT the "
+    "contract for use across the remaining 8 always-rules in Phase 2A."
+)
+REVIEW_PROMPT = """\
+# Council Acceptance Review — 2A.4 Worked Example
+## Context
+The host agent ran Phase 0.4 of `road-to-structural-optimization` v3.1: \
+took one always-rule (`direct-answers.md`, smallest of the top-3), split \
+it into a slim RULE+LOGIC half and a MECHANICS context, then applied \
+the 2A.4 obligation-keyword diff contract. The artefact is the report \
+of that dry-run; the two sandbox files are the actual produced split.
+You are not asked to re-litigate the v3.1 roadmap or the choice of \
+`direct-answers.md` — both were settled in earlier rounds. Verdict \
+solely concerns whether the **contract** (keyword × counts × \
+accept-rationale table, plus its tie-break rules) is now ready to be \
+applied to the remaining 8 always-rules in Phase 2A.
+## Output Contract (STRICT)
+Produce exactly these blocks in order. Be decisive — total response \
+budget <= 1200 words.
+```
+### Contract correctness
+**Verdict:** <ACCEPT | ACCEPT_WITH_REVISIONS | REJECT>
+**Keyword extraction completeness:** <COMPLETE | PARTIAL — list missing>
+**Tie-break rules sufficient:** <YES | NO — name the gap>
+**Required revisions (numbered, 1-3 max, only on ACCEPT_WITH_REVISIONS):**
+  1. <one sentence — smallest change>
+  2. <...>
+  3. <...>
+```
+```
+### Sandbox split quality
+**Slim file preserves all RULE+LOGIC obligations:** <YES | NO — list lost>
+**Mechanics file holds only mechanics+examples:** <YES | NO — list misplaced>
+**Round-trip: rule_slim + load_context(mechanics) == original behaviour:**
+  <YES | NO — name the divergence>
+```
+```
+### Generalisability to remaining 8 rules
+**Contract scales without per-rule tuning:** <YES | NO — name failure mode>
+**Single biggest risk on the next rule (likely `non-destructive-by-default`):**
+  <one sentence>
+```
+```
+### Final verdict
+**Lockable as-is for Phase 2A?** <YES | NO>
+**If NO, single blocking change required:** <one sentence>
+```
+Verdict definitions:
+- **ACCEPT** — contract ships unchanged; status moves to locked.
+- **ACCEPT_WITH_REVISIONS** — locks after the 1-3 listed revisions land.
+- **REJECT** — contract is structurally wrong; describe the fault.
+The three artefacts follow this prompt verbatim.
+"""
+def main() -> int:
+    anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
+    openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
+    members = [anthropic, openai]
+    context = bundle_files(ARTEFACTS)
+    project = detect_project_context(REPO_ROOT)
+    table = load_prices()
+    user_prompt = REVIEW_PROMPT + "\n\n---\n\n" + context.text
+    question = CouncilQuestion(
+        mode="files",
+        user_prompt=user_prompt,
+        max_tokens=3072,
+    )
+    estimates = estimate(
+        question, members, table, project=project, original_ask=ORIGINAL_ASK,
+    )
+    print("=== ESTIMATE (single round, max tokens) ===")
+    total_est = 0.0
+    for c, e in zip(members, estimates):
+        print(f"  {c.name}/{c.model}: ~{e.input_tokens} in + {e.output_tokens} out = ${e.total_usd:.4f}")
+        total_est += e.total_usd
+    print(f"  TOTAL per round (max): ${total_est:.4f}")
+    print()
+    budget = CostBudget(
+        max_input_tokens=200_000,
+        max_output_tokens=80_000,
+        max_calls=20,
+        max_total_usd=2.50,
+    )
+    rounds_collected: list[list] = []
+    def _on_round_complete(round_idx: int, round_responses) -> None:
+        rounds_collected.append(list(round_responses))
+        print(f"=== ROUND {round_idx + 1} COMPLETE ===")
+        for r in round_responses:
+            if r.error:
+                print(f"  [error] {r.provider}/{r.model}: {r.error}")
+                continue
+            actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
+            print(f"  [done] {r.provider}/{r.model}: {r.input_tokens} in / "
+                  f"{r.output_tokens} out · {r.latency_ms} ms · ${actual.total_usd:.4f}")
+        print()
+    print("=== CONSULT (1 round, 2A.4 acceptance review) ===")
+    consult(
+        members, question, budget,
+        rounds=1,
+        on_round_complete=_on_round_complete,
+        table=table, project=project, original_ask=ORIGINAL_ASK,
+    )
+    if not rounds_collected:
+        print("[error] no rounds completed", file=sys.stderr)
+        return 1
+    actual_total = 0.0
+    for round_responses in rounds_collected:
+        for r in round_responses:
+            if r.error:
+                continue
+            actual = estimate_cost(r.provider, r.model, r.input_tokens, r.output_tokens, table)
+            actual_total += actual.total_usd
+    print(f"=== TOTAL ACTUAL: ${actual_total:.4f} ===")
+    final_round = rounds_collected[-1]
+    if not [r for r in final_round if not r.error]:
+        return 1
+    manifest = SessionManifest(
+        mode="files",
+        artefact="agents/roadmaps/structural-optimization-2A4-example.md",
+        original_ask=ORIGINAL_ASK,
+        members=[f"{r.provider}/{r.model}" for r in final_round],
+        rounds=len(rounds_collected),
+        cost_usd_estimated=total_est,
+        cost_usd_actual=actual_total,
+        extra={"purpose": "Council acceptance review of Phase 0.4 2A.4 worked example"},
+    )
+    session_dir = save_session(manifest=manifest, responses=rounds_collected)
+    print(f"[saved] {session_dir.relative_to(REPO_ROOT)}/")
+    return 1 if any(r.error for round_r in rounds_collected for r in round_r) else 0
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/ai_council/_one_off_context_layer_v1_estimate.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""One-shot estimator for the v1 council review (no consult call).
+Sibling of `_one_off_context_layer_v1_review.py`. Prints bundle size and
+per-model token / cost projection so the user can confirm spend before
+the actual consult fires.
+"""
+from __future__ import annotations
+from pathlib import Path
+from scripts.ai_council._one_off_context_layer_v1_review import (
+    ORIGINAL_ASK,
+    REVIEW_PROMPT_HEADER,
+    ROADMAP_PATH,
+    _diff_stat,
+    _pr_body,
+)
+from scripts.ai_council.bundler import bundle_prompt
+from scripts.ai_council.clients import (
+    AnthropicClient,
+    OpenAIClient,
+    load_anthropic_key,
+    load_openai_key,
+)
+from scripts.ai_council.orchestrator import CouncilQuestion, estimate
+from scripts.ai_council.pricing import load_prices
+from scripts.ai_council.project_context import detect_project_context
+REPO_ROOT = Path(__file__).resolve().parents[2]
+def main() -> int:
+    roadmap_text = ROADMAP_PATH.read_text(encoding="utf-8")
+    parts = [
+        REVIEW_PROMPT_HEADER,
+        "## PR #36 — diff --stat\n\n```\n" + _diff_stat() + "\n```",
+        "## PR #36 — body\n\n" + _pr_body(),
+        "## Roadmap v1\n\n" + roadmap_text,
+    ]
+    bundle_text = "\n\n---\n\n".join(parts)
+    print(f"Bundle bytes: {len(bundle_text.encode('utf-8'))}")
+    ctx = bundle_prompt(bundle_text)
+    project = detect_project_context(REPO_ROOT)
+    table = load_prices()
+    anthropic = AnthropicClient(api_key=load_anthropic_key(), model="claude-sonnet-4-5")
+    openai = OpenAIClient(api_key=load_openai_key(), model="gpt-4o")
+    members = [anthropic, openai]
+    question = CouncilQuestion(mode="prompt", user_prompt=ctx.text, max_tokens=4096)
+    estimates = estimate(
+        question, members, table, project=project, original_ask=ORIGINAL_ASK,
+    )
+    total = 0.0
+    for c, e in zip(members, estimates):
+        print(
+            f"  {c.name}/{c.model}: ~{e.input_tokens} in + "
+            f"{e.output_tokens} out = ${e.total_usd:.4f}"
+        )
+        total += e.total_usd
+    print(f"  TOTAL (max, single round): ${total:.4f}")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())