npm - @seanyao/roll - Versions diffs - 2026.529.5 → 2026.601.2 - Mend

@seanyao/roll 2026.529.5 → 2026.601.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/CHANGELOG.md +57 -25
package/README.md +10 -7
package/bin/roll +3952 -317
package/conventions/config.yaml +7 -0
package/lib/__pycache__/github_sync.cpython-314.pyc +0 -0
package/lib/__pycache__/loop_result_eval.cpython-314.pyc +0 -0
package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
package/lib/__pycache__/roll-home.cpython-314.pyc +0 -0
package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
package/lib/__pycache__/roll_git.cpython-314.pyc +0 -0
package/lib/__pycache__/slides-render.cpython-314.pyc +0 -0
package/lib/agent_usage/__init__.py +4 -0
package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
package/lib/agent_usage/__pycache__/gemini.cpython-314.pyc +0 -0
package/lib/agent_usage/__pycache__/kimi.cpython-314.pyc +0 -0
package/lib/agent_usage/__pycache__/openai.cpython-314.pyc +0 -0
package/lib/agent_usage/__pycache__/qwen.cpython-314.pyc +0 -0
package/lib/agent_usage/gemini.py +127 -0
package/lib/agent_usage/kimi.py +127 -0
package/lib/agent_usage/openai.py +126 -0
package/lib/agent_usage/qwen.py +128 -0
package/lib/context_feed_budget.sh +194 -0
package/lib/github_sync.py +876 -0
package/lib/i18n/agent.sh +54 -0
package/lib/i18n/init.sh +22 -0
package/lib/i18n/peer.sh +7 -0
package/lib/i18n/peer_help.sh +4 -0
package/lib/i18n/skills_catalog.sh +30 -0
package/lib/loop-exit-summary.py +393 -0
package/lib/loop-fmt.py +93 -75
package/lib/loop_pick_agent.py +241 -170
package/lib/loop_result_eval.py +469 -0
package/lib/model_prices.py +0 -10
package/lib/roll-home.py +1 -28
package/lib/roll-loop-status.py +330 -40
package/lib/roll-onboard-render.py +378 -0
package/lib/roll-peer.py +1 -1
package/lib/roll-plan-validate.py +165 -0
package/lib/roll_git.py +41 -0
package/lib/slides/components/README.md +8 -2
package/lib/slides/templates/introduction-v3.html +1 -6
package/lib/slides-render.py +305 -15
package/lib/slides-validate.py +195 -7
package/package.json +1 -1
package/skills/roll-.changelog/SKILL.md +67 -56
package/skills/roll-brief/SKILL.md +1 -1
package/skills/roll-build/SKILL.md +14 -12
package/skills/roll-deck/SKILL.md +152 -0
package/skills/roll-design/SKILL.md +13 -6
package/skills/roll-doc/SKILL.md +269 -6
package/skills/roll-fix/SKILL.md +15 -9
package/skills/roll-loop/SKILL.md +9 -7
package/skills/roll-notes/SKILL.md +1 -1
package/skills/roll-onboard/SKILL.md +85 -0
package/skills/roll-peer/SKILL.md +6 -5
package/lib/agent_routes_lint.py +0 -203
package/skills/roll-research/SKILL.md +0 -316
package/skills/roll-research/references/schema.json +0 -166
package/skills/roll-research/scripts/md_to_pdf.py +0 -289

package/lib/loop_pick_agent.py CHANGED Viewed

@@ -1,46 +1,189 @@
 #!/usr/bin/env python3
-"""Pick a routing agent for a backlog story (US-AGENT-004).
+"""Classify a backlog story into a complexity tier (US-AGENT-022).
-Reads story metadata from the feature markdown (linked from the BACKLOG row)
-and matches it against agent-routes.yaml hard rules. Emits a single line on
-stdout:
+Supersedes the three-dimensional (type/est_min/risk_zone) hard-rule matcher
+and the history-driven soft preference (US-AGENT-004/005). Routing now turns
+on a single axis: the story's ``est_min`` estimate maps to one of three
+complexity tiers — ``easy`` / ``default`` / ``hard``. The tier → agent
+resolution (reading ``agents.yaml`` slots, fallback) lands in US-AGENT-023;
+this module is the pure classifier.
-    <agent> <rule_kind> <rationale>
+Emits a single line on stdout::
+    <tier> <rationale>
+where ``tier`` is one of ``easy`` / ``default`` / ``hard``.
+Tier boundaries (centralised constants, intentionally NOT user-configurable
+to keep routing variance to a single axis):
+    est_min <= 8           → easy
+    8 < est_min <= 20      → default
+    est_min > 20           → hard
+    missing / illegal est  → default
 Exit codes:
-  0 — agent picked (rule_kind in {hard, default})
-  1 — story id not found / unrecoverable error
+  0 — tier classified (always succeeds once the story is found)
+  1 — story id not found in backlog / unrecoverable error
 Usage:
-  loop_pick_agent.py --story-id US-AGENT-004 \\
-                     --backlog .roll/backlog.md \\
-                     --routes  .roll/agent-routes.yaml
-History-driven soft preference (US-AGENT-005) lands on top of this in a
-later commit; the present module only implements hard-rule selection.
+  loop_pick_agent.py --story-id US-AGENT-022 --backlog .roll/backlog.md
+  loop_pick_agent.py --est-min 12          # classify a bare estimate
 """
 from __future__ import annotations
 import argparse
-import json
 import re
 import sys
 from pathlib import Path
-try:
-    import yaml
-except ImportError:
-    print("loop_pick_agent: PyYAML not installed", file=sys.stderr)
-    sys.exit(2)
+# Complexity-tier boundaries. Single source of truth — change here only.
+EASY_MAX_MIN = 8        # est_min <= 8        → easy
+HARD_MIN_MIN = 20       # est_min >  20       → hard
+TIER_EASY = "easy"
+TIER_DEFAULT = "default"
+TIER_HARD = "hard"
 PROFILE_BLOCK_RE = re.compile(r"\*\*Agent profile:\*\*")
 EST_RE = re.compile(r"^\s*-\s*est_min:\s*(\d+)")
-RISK_RE = re.compile(r"^\s*-\s*risk_zone:\s*([a-zA-Z]+)")
-CHAIN_RE = re.compile(r"^\s*-\s*chain_depth:\s*(\d+)")
 ANCHOR_TEMPLATE = '<a id="{anchor}"></a>'
+def _classify_complexity(est_min) -> str:
+    """Map an ``est_min`` estimate onto a complexity tier.
+    ``<= 8`` → easy, ``> 20`` → hard, ``8 < x <= 20`` → default. A missing or
+    non-integer estimate (None, "", non-numeric) falls back to ``default``.
+    """
+    if est_min is None:
+        return TIER_DEFAULT
+    try:
+        n = int(est_min)
+    except (TypeError, ValueError):
+        return TIER_DEFAULT
+    if n < 0:
+        # Negative estimate is invalid data → treat like missing.
+        return TIER_DEFAULT
+    if n <= EASY_MAX_MIN:
+        return TIER_EASY
+    if n > HARD_MIN_MIN:
+        return TIER_HARD
+    return TIER_DEFAULT
+# ─────────────────────────────────────────────────────────────────────────────
+# US-AGENT-030: transparent, auditable in-tier soft nudge.
+#
+# The complexity tier (easy/default/hard from ``_classify_complexity``) is a
+# HARD constraint — it decides which agents.yaml slot is consulted and a task is
+# NEVER moved out of its tier. On top of that hard floor this adds a SOFT
+# priority: among the candidate agents already associated with this tier, prefer
+# the one with the best per-(agent × story_type) historical hit-rate.
+#
+# How this differs from the US-AGENT-022-retired soft preference (the whole
+# point of the story):
+#   - deterministic: same history in → same agent out. No rng, no time seed,
+#     no decay clock. ``nudge_within_tier`` is a pure function of its arguments.
+#   - auditable: every decision returns a human-readable rationale string that
+#     the caller logs into runs.jsonl + the event log.
+#   - sample floor: a (agent, story_type) combo below ``sample_floor`` does not
+#     participate; the slot agent is kept and the audit line says so.
+#   - one switch: ``enabled=False`` makes this an exact identity — it returns
+#     the slot agent unchanged, behaving precisely like US-AGENT-023.
+# ─────────────────────────────────────────────────────────────────────────────
+# Default minimum samples a (agent × story_type) combo needs before its hit-rate
+# is allowed to influence routing. Below this the combo is statistically
+# meaningless, so we keep the operator's slot choice. Centralised constant.
+SAMPLE_FLOOR = 8
+def nudge_within_tier(slot_agent, candidates, story_type, hit_rates,
+                      sample_floor=SAMPLE_FLOOR, enabled=True):
+    """Reorder same-tier candidates by historical hit-rate; return the winner.
+    Pure function — no I/O, no randomness, no clock. Given the same arguments
+    it always returns the same ``(chosen_agent, rationale)`` pair.
+    Args:
+      slot_agent:   the agent the est_min tier slot resolved to (the hard-floor
+                    default). Always the fallback / tie-break winner.
+      candidates:   iterable of in-tier candidate agent names (already
+                    constrained to this tier + installed by the caller). The
+                    slot agent is folded in even if absent.
+      story_type:   the story's type bucket (e.g. "US" / "FIX"); the hit-rate is
+                    looked up per (agent, story_type).
+      hit_rates:    {"<agent>\\x1f<story_type>": {"hit_rate": float,
+                    "sample_n": int}}  (the loop_result_eval read model).
+      sample_floor: combos with sample_n < this are ignored (default 8).
+      enabled:      when False, returns (slot_agent, "<reason: disabled>") with
+                    no reordering — exact US-AGENT-023 behaviour.
+    Returns:
+      (chosen_agent, rationale) where rationale is a one-line audit string.
+    """
+    if not slot_agent:
+        return (slot_agent, "no slot agent; nudge skipped")
+    if not enabled:
+        return (slot_agent, "nudge disabled; keeping est_min slot %s" % slot_agent)
+    # Build the candidate set: the slot agent is always in the running, plus any
+    # caller-supplied in-tier candidates. De-dup but keep a deterministic order
+    # (slot agent first, then the rest sorted) so iteration is reproducible.
+    seen = {slot_agent}
+    rest = []
+    for c in (candidates or []):
+        if c and c not in seen:
+            seen.add(c)
+            rest.append(c)
+    ordered = [slot_agent] + sorted(rest)
+    def _stat(agent):
+        key = "%s\x1f%s" % (agent, story_type)
+        st = (hit_rates or {}).get(key) or {}
+        try:
+            n = int(st.get("sample_n", 0))
+        except (TypeError, ValueError):
+            n = 0
+        try:
+            hr = float(st.get("hit_rate", 0.0))
+        except (TypeError, ValueError):
+            hr = 0.0
+        return hr, n
+    # Eligible = combos that clear the sample floor.
+    eligible = []
+    for a in ordered:
+        hr, n = _stat(a)
+        if n >= sample_floor:
+            eligible.append((a, hr, n))
+    if not eligible:
+        return (slot_agent,
+                "n<%d for all %s candidates in this tier; keeping slot %s"
+                % (sample_floor, story_type, slot_agent))
+    # Best hit-rate wins. Deterministic tie-break: the slot agent first (it is
+    # always index 0 in ``ordered``), then the earliest candidate in the stable
+    # order. Sort by (-hit_rate, ordered_index) so ties never depend on dict
+    # iteration or locale.
+    index_of = {}
+    for i, a in enumerate(ordered):
+        index_of[a] = i
+    eligible.sort(key=lambda t: (-t[1], index_of[t[0]]))
+    best_agent, best_hr, best_n = eligible[0]
+    slot_hr, slot_n = _stat(slot_agent)
+    if best_agent == slot_agent:
+        return (slot_agent,
+                "%s best for %s in-tier (hit_rate %.2f, n=%d); slot kept"
+                % (slot_agent, story_type, best_hr, best_n))
+    return (best_agent,
+            "%s in-tier hit_rate %.2f (n=%d) > slot %s %.2f (n=%d) for %s -> prefer %s"
+            % (best_agent, best_hr, best_n, slot_agent, slot_hr, slot_n,
+               story_type, best_agent))
 def _id_to_anchor(story_id: str) -> str:
     return story_id.lower()
@@ -53,19 +196,19 @@ def _find_feature_md(backlog_path: Path, story_id: str) -> Path | None:
         r"\[" + re.escape(story_id) + r"\]\((\.roll/features/[^)]+?)#",
         re.IGNORECASE,
     )
-    for line in backlog_path.read_text().splitlines():
+    for line in backlog_path.read_text(encoding="utf-8").splitlines():
         m = link_re.search(line)
         if m:
             return Path(m.group(1))
     return None
-def _read_profile(feature_md: Path, story_id: str) -> dict | None:
-    """Return {est_min, risk_zone, chain_depth} or None if not found."""
+def _read_est_min(feature_md: Path, story_id: str):
+    """Return the story's est_min as an int, or None if not found."""
     if not feature_md.exists():
         return None
     anchor = ANCHOR_TEMPLATE.format(anchor=_id_to_anchor(story_id))
-    text = feature_md.read_text()
+    text = feature_md.read_text(encoding="utf-8")
     if anchor not in text:
         return None
@@ -78,166 +221,94 @@ def _read_profile(feature_md: Path, story_id: str) -> dict | None:
     if not PROFILE_BLOCK_RE.search(section):
         return None
-    profile: dict[str, object] = {}
     for line in section.splitlines():
         m = EST_RE.match(line)
         if m:
-            profile["est_min"] = int(m.group(1))
-            continue
-        m = RISK_RE.match(line)
-        if m:
-            profile["risk_zone"] = m.group(1).lower()
-            continue
-        m = CHAIN_RE.match(line)
-        if m:
-            profile["chain_depth"] = int(m.group(1))
-    if "est_min" not in profile or "risk_zone" not in profile:
-        return None
-    profile.setdefault("chain_depth", 0)
-    return profile
-def _story_type(story_id: str) -> str:
-    # Story id prefix → routing type. US-AGENT-004 → "US", FIX-* → "FIX",
-    # REFACTOR-* → "REFACTOR". Default falls through to "US".
-    prefix = story_id.split("-", 1)[0].upper()
-    return prefix if prefix in {"FIX", "US", "REFACTOR"} else "US"
-def _agent_matches(agent_cfg: dict, story_type: str, est_min: int, risk_zone: str) -> bool:
-    types = agent_cfg.get("types") or []
-    if story_type not in types:
-        return False
-    est_range = agent_cfg.get("est_min") or {}
-    lo = est_range.get("min")
-    hi = est_range.get("max")
-    if lo is not None and est_min < lo:
-        return False
-    if hi is not None and est_min > hi:
-        return False
-    risk_list = agent_cfg.get("risk") or []
-    if risk_zone not in risk_list:
-        return False
-    return True
-def _hit_rates(runs_path: Path, story_type: str, window: int) -> dict[str, tuple[int, int]]:
-    """Return {agent: (built_count, total_count)} for the requested story type
-    over the last `window` runs.jsonl records that targeted that type. Records
-    must carry `agent` and `story_type` (forward-looking schema, US-AGENT-005).
-    Older records lacking these fields are skipped silently.
-    """
-    rates: dict[str, list[int]] = {}
-    if window <= 0 or not runs_path.exists():
-        return {}
-    # Read all then take last N matching story_type.
-    matching: list[dict] = []
-    for line in runs_path.read_text().splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            rec = json.loads(line)
-        except ValueError:
-            continue
-        if rec.get("story_type") != story_type:
-            continue
-        if "agent" not in rec:
-            continue
-        matching.append(rec)
-    for rec in matching[-window:]:
-        agent = rec["agent"]
-        slot = rates.setdefault(agent, [0, 0])
-        slot[1] += 1
-        if rec.get("status") == "built":
-            slot[0] += 1
-    return {a: (b, t) for a, (b, t) in rates.items()}
-def pick(story_id: str, backlog_path: Path, routes_path: Path,
-         runs_path: Path | None = None) -> tuple[str, str, str] | None:
-    """Return (agent, rule_kind, rationale) or None on hard error."""
-    if not routes_path.exists():
-        return None
-    routes = yaml.safe_load(routes_path.read_text()) or {}
-    agents = routes.get("agents") or {}
-    history = routes.get("history") or {}
-    cold = history.get("cold_start_default") or next(iter(agents), None)
-    window = int(history.get("window_cycles", 0) or 0)
-    threshold = float(history.get("prefer_threshold", 0.0) or 0.0)
+            return int(m.group(1))
+    return None
+def classify_story(story_id: str, backlog_path: Path) -> tuple[str, str] | None:
+    """Return (tier, rationale) for a backlog story, or None on hard error."""
     feature_md = _find_feature_md(backlog_path, story_id)
     if feature_md is None:
         return None  # story id not in backlog
-    profile = _read_profile(feature_md, story_id)
-    if profile is None:
-        if cold is None:
-            return None
-        return (cold, "default", f"no profile for {story_id}; fell back to cold_start_default")
-    story_type = _story_type(story_id)
-    est_min = profile["est_min"]
-    risk_zone = profile["risk_zone"]
-    # Hard-rule candidate set in declaration order.
-    matched: list[str] = []
-    for name, cfg in agents.items():
-        if _agent_matches(cfg or {}, story_type, est_min, risk_zone):
-            matched.append(name)
-    if not matched:
-        if cold is None:
-            return None
-        return (cold, "default", f"no agent matched {story_type}/{est_min}/{risk_zone}; cold_start_default")
-    # Single match → no soft pref needed.
-    if len(matched) == 1 or runs_path is None or window <= 0:
-        chosen = matched[0]
-        rationale = f"hard: type={story_type} est={est_min} risk={risk_zone} matched {chosen}"
-        return (chosen, "hard", rationale)
-    # Multiple matches → consider history soft preference.
-    rates = _hit_rates(runs_path, story_type, window)
-    # Filter rates to candidates only, require sample ≥ 5 and rate ≥ threshold.
-    eligible = []
-    for cand in matched:
-        built, total = rates.get(cand, (0, 0))
-        if total >= 5:
-            rate = built / total if total else 0.0
-            if rate >= threshold:
-                eligible.append((rate, cand))
-    if eligible:
-        eligible.sort(reverse=True)  # highest rate first
-        rate, chosen = eligible[0]
-        rationale = (
-            f"soft: type={story_type} est={est_min} risk={risk_zone} "
-            f"history_rate={rate:.2f} (threshold={threshold}) matched {chosen}"
-        )
-        return (chosen, "soft", rationale)
-    # Fallback to hard-rule first.
-    chosen = matched[0]
-    rationale = f"hard: type={story_type} est={est_min} risk={risk_zone} matched {chosen} (no eligible history)"
-    return (chosen, "hard", rationale)
+    est_min = _read_est_min(feature_md, story_id)
+    tier = _classify_complexity(est_min)
+    if est_min is None:
+        rationale = f"no est_min for {story_id}; tier={tier} (default)"
+    else:
+        rationale = f"est_min={est_min} → tier={tier}"
+    return (tier, rationale)
 def main() -> int:
     parser = argparse.ArgumentParser()
-    parser.add_argument("--story-id", required=True)
+    parser.add_argument("--story-id")
     parser.add_argument("--backlog", default=".roll/backlog.md")
-    parser.add_argument("--routes", default=".roll/agent-routes.yaml")
-    parser.add_argument("--runs", default=None,
-                        help="runs.jsonl path for history soft preference (US-AGENT-005)")
+    parser.add_argument("--est-min", default=None,
+                        help="classify a bare estimate without a backlog lookup")
+    # Accepted for backward-compatible invocation; routing no longer reads
+    # agent-routes.yaml or runs.jsonl (US-AGENT-022 retires the 3-dim matcher
+    # and history soft preference). Tier→agent resolution is US-AGENT-023.
+    parser.add_argument("--routes", default=None, help=argparse.SUPPRESS)
+    parser.add_argument("--runs", default=None, help=argparse.SUPPRESS)
+    # US-AGENT-030: in-tier soft nudge. When --nudge is given, the other --nudge-*
+    # args drive nudge_within_tier and the chosen agent + rationale are printed as
+    # "<agent>\t<rationale>" (tab-separated so the rationale can carry spaces).
+    parser.add_argument("--nudge", action="store_true",
+                        help="reorder in-tier candidates by historical hit-rate")
+    parser.add_argument("--slot-agent", default=None,
+                        help="the est_min tier slot agent (nudge hard-floor default)")
+    parser.add_argument("--story-type", default="",
+                        help="story type bucket for the hit-rate lookup (US/FIX/...)")
+    parser.add_argument("--candidates", default="",
+                        help="comma-separated in-tier candidate agent names")
+    parser.add_argument("--hit-rates", default=None,
+                        help="hit-rate read model JSON (from loop_result_eval --hit-rates); "
+                             "reads stdin if omitted")
+    parser.add_argument("--sample-floor", type=int, default=SAMPLE_FLOOR,
+                        help="min sample_n a combo needs to influence routing")
+    parser.add_argument("--disabled", action="store_true",
+                        help="run the identity path (exact US-AGENT-023 behaviour)")
     args = parser.parse_args()
-    runs = Path(args.runs) if args.runs else None
-    result = pick(args.story_id, Path(args.backlog), Path(args.routes), runs)
+    if args.nudge:
+        if not args.slot_agent:
+            print("loop_pick_agent: --slot-agent required with --nudge", file=sys.stderr)
+            return 1
+        import json
+        raw = args.hit_rates
+        if raw is None:
+            raw = sys.stdin.read()
+        try:
+            hit_rates = json.loads(raw) if raw and raw.strip() else {}
+        except (ValueError, TypeError) as exc:
+            print(f"loop_pick_agent: bad hit-rates JSON: {exc}", file=sys.stderr)
+            return 1
+        candidates = [c.strip() for c in args.candidates.split(",") if c.strip()]
+        chosen, rationale = nudge_within_tier(
+            args.slot_agent, candidates, args.story_type, hit_rates,
+            sample_floor=args.sample_floor, enabled=not args.disabled)
+        # Tab-separated: field 1 = chosen agent, field 2 = audit rationale.
+        print(f"{chosen}\t{rationale}")
+        return 0
+    if args.est_min is not None:
+        tier = _classify_complexity(args.est_min)
+        print(f"{tier} est_min={args.est_min} → tier={tier}")
+        return 0
+    if not args.story_id:
+        print("loop_pick_agent: --story-id or --est-min required", file=sys.stderr)
+        return 1
+    result = classify_story(args.story_id, Path(args.backlog))
     if result is None:
-        print(f"loop_pick_agent: cannot route {args.story_id}", file=sys.stderr)
+        print(f"loop_pick_agent: cannot classify {args.story_id}", file=sys.stderr)
         return 1
-    agent, rule_kind, rationale = result
-    print(f"{agent} {rule_kind} {rationale}")
+    tier, rationale = result
+    print(f"{tier} {rationale}")
     return 0