npm - @event4u/agent-config - Versions diffs - 1.12.0 → 1.14.0 - Mend

@event4u/agent-config 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (260) hide show

package/.agent-src/templates/scripts/work_engine/directives/ui/review.py ADDED Viewed

@@ -0,0 +1,468 @@
+"""``review`` step — stack-dispatched design-review pass.
+Phase 3 Step 4 of ``agents/roadmaps/road-to-product-ui-track.md``: the
+review step compares the rendered components from ``apply`` against
+the locked design brief and produces a structured findings list. It
+does **not** apply fixes — that is the polish step's job. Review's
+single output is ``state.ui_review`` carrying ``findings`` (a list of
+zero or more issue records) and ``review_clean`` (a bool that mirrors
+``len(findings) == 0`` once the agent finalises the pass).
+Routes on ``state.ui_review`` shape:
+- **Empty / None / non-dict** — first pass. Emit
+  ``@agent-directive: ui-design-review-<stack>`` delegating to the
+  stack-specific review skill; on the rebound the envelope lands in
+  ``state.ui_review``.
+- **Populated, missing ``findings``** — partial envelope, the skill
+  has to finish the pass. Halt with the same directive so the agent
+  re-runs the review.
+- **Populated, ``findings`` present, ``review_clean`` missing or not
+  a bool** — halt asking the agent to set the flag explicitly. Polish
+  reads it and would short-circuit the loop on a wrong value.
+- **Populated, well-formed** — return ``SUCCESS``; the dispatcher
+  advances to ``verify`` (polish), which decides whether to loop.
+Review does **not** enforce ``review_clean == (len(findings) == 0)``.
+That looks tempting but it blocks the legitimate "ship as-is with
+open findings" replay path: polish's ceiling halt asks the user to
+set ``review_clean = True`` while findings are still present, the
+dispatcher advances to report, and a later replay of the state file
+would re-enter review with that envelope. Honesty of the flag is the
+producer's contract (the review skill on first pass; the polish skill
+on ship-as-is); review only checks the shape.
+Idempotent: a re-entry with the same well-formed envelope round-trips
+through ``SUCCESS`` without re-emitting a halt.
+"""
+from __future__ import annotations
+from typing import Any
+from ...delivery_state import (
+    DeliveryState,
+    Outcome,
+    StepResult,
+    agent_directive,
+)
+STACK_DIRECTIVES: dict[str, str] = {
+    "blade-livewire-flux": "ui-design-review-blade-livewire-flux",
+    "react-shadcn": "ui-design-review-react-shadcn",
+    "vue": "ui-design-review-vue",
+    "plain": "ui-design-review-plain",
+}
+"""Map ``state.stack.frontend`` → agent-directive skill name.
+Mirrors :data:`work_engine.directives.ui.apply.STACK_DIRECTIVES` so
+review fires the matching review skill for the stack apply targeted.
+An unknown stack falls through to ``ui-design-review-plain``.
+"""
+DEFAULT_DIRECTIVE = "ui-design-review-plain"
+"""Fallback directive when ``state.stack`` is missing or malformed."""
+SEVERITY_ORDER: dict[str, int] = {
+    "minor": 0,
+    "moderate": 1,
+    "serious": 2,
+    "critical": 3,
+}
+"""R4 a11y severity ranking — mirrors axe-core's impact levels."""
+DEFAULT_SEVERITY_FLOOR = "moderate"
+"""R4 a11y default severity floor — violations strictly below this are
+informational; violations at or above are actionable."""
+AMBIGUITIES: tuple[dict[str, str], ...] = (
+    {
+        "code": "review_envelope_missing",
+        "trigger": "state.ui_review unset / empty — review skill has not run yet",
+        "resolution": "agent directive `ui-design-review-<stack>` → "
+        "skill compares rendered components against state.ui_design "
+        "and writes `findings` + `review_clean` back",
+    },
+    {
+        "code": "review_findings_missing",
+        "trigger": "state.ui_review populated but `findings` key absent",
+        "resolution": "agent re-runs the review skill with the same "
+        "directive; review only succeeds once findings is a list",
+    },
+    {
+        "code": "review_clean_missing",
+        "trigger": "state.ui_review.findings is set but review_clean "
+        "is missing or not a bool — polish needs an explicit flag",
+        "resolution": "agent sets state.ui_review.review_clean to "
+        "True or False before returning the envelope; review does "
+        "not infer it from findings count",
+    },
+    {
+        "code": "review_a11y_pending",
+        "trigger": "state.ui_audit declared an `a11y_baseline` but "
+        "state.ui_review.a11y is missing — the review skill ran but "
+        "did not produce an a11y envelope",
+        "resolution": "agent re-runs the review skill so it captures "
+        "axe-core (or equivalent) findings into "
+        "`state.ui_review.a11y.violations`; the gate then filters "
+        "against the baseline and the severity floor",
+    },
+    {
+        "code": "preview_render_failed",
+        "trigger": "state.ui_review.preview.render_ok is False — the "
+        "stack-specific review skill tried to render the changed "
+        "components and the headless browser reported an error",
+        "resolution": "user picks Retry (re-run the review skill so it "
+        "renders again), Skip (write `state.ui_review.preview.skipped = "
+        "true` so the gate stops asking this run), or Abort",
+    },
+)
+"""Declared ambiguity surfaces for this step."""
+def run(state: DeliveryState) -> StepResult:
+    """Apply the design-review gate to ``state.ui_review``."""
+    review = state.ui_review
+    if not _is_populated(review):
+        return _delegate_to_review_skill(state)
+    if "findings" not in review or not isinstance(review["findings"], list):
+        return _halt_findings_missing(state)
+    findings = review["findings"]
+    if not isinstance(review.get("review_clean"), bool):
+        return _halt_clean_missing(state, findings_count=len(findings))
+    a11y_halt = _apply_a11y_gate(state, review)
+    if a11y_halt is not None:
+        return a11y_halt
+    preview_halt = _apply_preview_gate(state, review)
+    if preview_halt is not None:
+        return preview_halt
+    return StepResult(outcome=Outcome.SUCCESS)
+def _is_populated(review: Any) -> bool:
+    """True when ``review`` is a dict with at least one own key.
+    Non-dict and empty-dict shapes are treated as "skill has not run"
+    so the first-pass directive fires.
+    """
+    return isinstance(review, dict) and bool(review)
+def _resolve_directive(state: DeliveryState) -> str:
+    """Pick the agent directive for the project's frontend stack."""
+    stack = getattr(state, "stack", None) or {}
+    if isinstance(stack, dict):
+        frontend = stack.get("frontend")
+        if isinstance(frontend, str) and frontend in STACK_DIRECTIVES:
+            return STACK_DIRECTIVES[frontend]
+    return DEFAULT_DIRECTIVE
+def _stack_label(state: DeliveryState) -> str:
+    """Return the frontend stack label, defaulting to ``plain``."""
+    stack = getattr(state, "stack", None) or {}
+    if isinstance(stack, dict):
+        frontend = stack.get("frontend")
+        if isinstance(frontend, str) and frontend:
+            return frontend
+    return "plain"
+def _delegate_to_review_skill(state: DeliveryState) -> StepResult:
+    """First-pass halt — emit the stack-specific review directive."""
+    directive = _resolve_directive(state)
+    stack_label = _stack_label(state)
+    return StepResult(
+        outcome=Outcome.BLOCKED,
+        questions=[
+            agent_directive(directive),
+            f"> Stack: `{stack_label}`. Reviewing rendered components "
+            "against the locked design brief.",
+            "> The review pass compares `state.ticket.ui_apply.rendered` "
+            "against `state.ui_design` (microcopy, states, a11y, layout) "
+            "and produces a structured `findings` list.",
+            "> 1. Continue \u2014 run the review and write "
+            "`{findings: [...], review_clean: bool}` into "
+            "`state.ui_review`",
+            "> 2. Abort \u2014 drop this UI request",
+        ],
+        message=(
+            f"UI review pending; delegating to `{directive}` for "
+            f"stack `{stack_label}`."
+        ),
+    )
+def _halt_findings_missing(state: DeliveryState) -> StepResult:
+    """BLOCKED halt — envelope present but ``findings`` slot is unset."""
+    directive = _resolve_directive(state)
+    return StepResult(
+        outcome=Outcome.BLOCKED,
+        questions=[
+            agent_directive(directive),
+            "> Review envelope is partial: `findings` list is missing.",
+            "> Re-run the review skill so `state.ui_review.findings` "
+            "is a list (empty when nothing is wrong).",
+        ],
+        message="UI review envelope incomplete; `findings` missing.",
+    )
+def _halt_clean_missing(
+    state: DeliveryState,
+    *,
+    findings_count: int,
+) -> StepResult:
+    """BLOCKED halt — ``review_clean`` is missing or not a bool."""
+    directive = _resolve_directive(state)
+    return StepResult(
+        outcome=Outcome.BLOCKED,
+        questions=[
+            agent_directive(directive),
+            "> Review envelope is incomplete: `review_clean` is missing "
+            "or not a boolean.",
+            f"> Findings count: {findings_count}. Set "
+            "`state.ui_review.review_clean` to `True` (no further "
+            "polish needed) or `False` (polish loop should run).",
+        ],
+        message=(
+            "UI review envelope incomplete; `review_clean` must be a bool."
+        ),
+    )
+def _apply_a11y_gate(
+    state: DeliveryState,
+    review: dict[str, Any],
+) -> StepResult | None:
+    """R4 Phase 1: enforce a11y gate after the basic shape gates pass.
+    The gate is **opt-in via the audit baseline**: if
+    ``state.ui_audit.a11y_baseline`` is present (a list, possibly empty)
+    the audit declared this UI surface to be a11y-tracked. The review
+    skill must then populate ``state.ui_review.a11y.violations``;
+    missing → ``review_a11y_pending`` halt. Pre-R4 envelopes (no
+    baseline) bypass the gate so existing fixtures keep working.
+    When the envelope is present the gate filters violations against
+    the baseline (pre-existing issues are ignored), against the
+    accepted list (user-acknowledged issues from a previous polish
+    halt), and against the severity floor (default ``moderate``). Any
+    *actionable* leftover violations are synthesised as
+    ``a11y_violation`` findings on ``review.findings`` and
+    ``review_clean`` is forced to ``False`` so polish picks them up.
+    Returns ``None`` to advance the dispatcher, or a ``BLOCKED``
+    ``StepResult`` for the pending halt.
+    Side effects on ``review`` are deduplicated by ``(kind, rule)`` so
+    a re-entry round-trips without growing the findings list.
+    """
+    audit = getattr(state, "ui_audit", None)
+    has_baseline = isinstance(audit, dict) and "a11y_baseline" in audit
+    a11y = review.get("a11y")
+    if a11y is None:
+        if has_baseline:
+            return _halt_a11y_pending(state)
+        return None
+    violations = a11y.get("violations") or []
+    baseline = audit["a11y_baseline"] if has_baseline else []
+    accepted = a11y.get("accepted_violations") or []
+    floor = a11y.get("severity_floor") or DEFAULT_SEVERITY_FLOOR
+    new_violations = _filter_known(violations, baseline)
+    new_violations = _filter_known(new_violations, accepted)
+    actionable = [v for v in new_violations if _at_or_above_floor(v, floor)]
+    if not actionable:
+        return None
+    _synthesize_a11y_findings(review["findings"], actionable)
+    review["review_clean"] = False
+    return None
+def _filter_known(
+    violations: list[Any],
+    known: list[Any],
+) -> list[Any]:
+    """Drop violations whose ``(rule, selector)`` matches ``known``.
+    Used for both the baseline filter (pre-existing violations stay
+    ignored) and the accepted filter (user-acknowledged violations
+    after a ``polish_a11y_blocking`` halt). Non-dict entries in either
+    list are skipped — schema only enforces list shape.
+    """
+    if not known:
+        return list(violations)
+    keys: set[tuple[Any, Any]] = set()
+    for entry in known:
+        if isinstance(entry, dict):
+            keys.add((entry.get("rule"), entry.get("selector")))
+    if not keys:
+        return list(violations)
+    return [
+        v for v in violations
+        if not (
+            isinstance(v, dict)
+            and (v.get("rule"), v.get("selector")) in keys
+        )
+    ]
+def _at_or_above_floor(violation: Any, floor: str) -> bool:
+    """``True`` when ``violation.severity`` is at or above ``floor``.
+    Unknown severities default to ``moderate`` rather than dropping
+    the violation — a malformed envelope must not silently weaken the
+    gate. The floor itself is schema-validated, so a bogus floor never
+    reaches this helper.
+    """
+    if not isinstance(violation, dict):
+        return False
+    severity = violation.get("severity")
+    sev_rank = SEVERITY_ORDER.get(
+        severity if isinstance(severity, str) else "",
+        SEVERITY_ORDER[DEFAULT_SEVERITY_FLOOR],
+    )
+    floor_rank = SEVERITY_ORDER.get(floor, SEVERITY_ORDER[DEFAULT_SEVERITY_FLOOR])
+    return sev_rank >= floor_rank
+def _synthesize_a11y_findings(
+    findings: list[Any],
+    actionable: list[Any],
+) -> None:
+    """Append ``a11y_violation`` findings, deduped by ``(rule, selector)``.
+    Polish reads these as ordinary findings; the ``kind`` discriminator
+    lets Phase 2's ``polish_a11y_blocking`` gate isolate the a11y
+    subset at the polish ceiling.
+    """
+    existing: set[tuple[Any, Any]] = {
+        (f.get("rule"), f.get("selector"))
+        for f in findings
+        if isinstance(f, dict) and f.get("kind") == "a11y_violation"
+    }
+    for v in actionable:
+        if not isinstance(v, dict):
+            continue
+        key = (v.get("rule"), v.get("selector"))
+        if key in existing:
+            continue
+        findings.append({
+            "kind": "a11y_violation",
+            "rule": v.get("rule"),
+            "selector": v.get("selector"),
+            "severity": v.get("severity"),
+        })
+        existing.add(key)
+def _halt_a11y_pending(state: DeliveryState) -> StepResult:
+    """BLOCKED halt — audit declared a baseline but review has no a11y."""
+    directive = _resolve_directive(state)
+    return StepResult(
+        outcome=Outcome.BLOCKED,
+        questions=[
+            agent_directive(directive),
+            "> Review envelope is incomplete: the audit declared an "
+            "`a11y_baseline` but `state.ui_review.a11y` is missing.",
+            "> Re-run the review skill so it captures axe-core (or "
+            "equivalent) findings into "
+            "`state.ui_review.a11y.violations`. The gate filters "
+            "against the baseline and the severity floor "
+            f"(default `{DEFAULT_SEVERITY_FLOOR}`).",
+        ],
+        message=(
+            "UI review envelope incomplete; `a11y` envelope missing "
+            "(audit declared a baseline)."
+        ),
+    )
+def _apply_preview_gate(
+    state: DeliveryState,
+    review: dict[str, Any],
+) -> StepResult | None:
+    """R4 Phase 3: validate the visual-preview envelope written by the skill.
+    Contract: the **engine never renders**. Stack-specific review skills
+    (Playwright + axe-core for ``react-shadcn``, equivalent for
+    ``blade-livewire-flux``) produce ``state.ui_review.preview`` with
+    ``render_ok`` plus optional ``screenshot_path``, ``dom_dump_path``,
+    and ``error``. The gate inspects shape only.
+    Branches:
+    - ``preview`` missing or not a dict → no-op (opt-in; pre-R4 envelopes
+      and stacks that do not produce previews flow through silently).
+    - ``preview.skipped`` truthy → no-op (idempotent re-entry after the
+      user picked the Skip option on a previous halt).
+    - ``preview.render_ok`` missing → no-op (envelope still in progress;
+      schema validates type when present, content gates wait for an
+      explicit signal).
+    - ``preview.render_ok is False`` → ``preview_render_failed`` halt
+      with Retry / Skip / Abort options.
+    - ``preview.render_ok is True`` → no-op; ``report.run`` will surface
+      ``screenshot_path`` / ``dom_dump_path`` as a delivery artifact.
+    Trivial path (``directive_set == "ui-trivial"``) never reaches this
+    handler — the dispatcher routes ``review`` to ``_skipped.run`` for
+    that set, so the preview envelope is bypassed by construction.
+    """
+    preview = review.get("preview")
+    if not isinstance(preview, dict):
+        return None
+    if preview.get("skipped"):
+        return None
+    if "render_ok" not in preview:
+        return None
+    if preview["render_ok"] is False:
+        return _halt_preview_failed(state, preview)
+    return None
+def _halt_preview_failed(
+    state: DeliveryState,
+    preview: dict[str, Any],
+) -> StepResult:
+    """BLOCKED halt — render reported failure; user picks the next step."""
+    directive = _resolve_directive(state)
+    error = preview.get("error")
+    error_line = (
+        f"> Render error: `{error}`."
+        if isinstance(error, str) and error
+        else "> Render error: `(none reported)`."
+    )
+    return StepResult(
+        outcome=Outcome.BLOCKED,
+        questions=[
+            agent_directive(directive),
+            "> Visual preview failed: "
+            "`state.ui_review.preview.render_ok` is `False`.",
+            error_line,
+            "> 1. Retry — re-run the review skill so it renders again "
+            "and writes a fresh `preview` envelope",
+            "> 2. Skip — set `state.ui_review.preview.skipped = true` "
+            "so this run ships without a screenshot artifact",
+            "> 3. Abort — drop this UI request",
+        ],
+        message="UI preview render failed; awaiting user decision.",
+    )
+__all__ = [
+    "AMBIGUITIES",
+    "DEFAULT_DIRECTIVE",
+    "DEFAULT_SEVERITY_FLOOR",
+    "SEVERITY_ORDER",
+    "STACK_DIRECTIVES",
+    "run",
+]

package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/__init__.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""UI-trivial directive set — single-file ≤5-line micro-edit path.
+Phase 2 Step 6 of ``agents/roadmaps/road-to-product-ui-track.md``: the
+short-circuit path for changes that provably cannot need the audit /
+design / review / polish loop. The dispatcher routes here when Phase
+1's intent classifier landed ``ui-trivial`` (color tweak, copy change,
+single-class swap, one-prop adjustment).
+The eight-step shape mirrors :mod:`work_engine.directives.backend` /
+:mod:`work_engine.directives.ui` — eight slots, fixed order, no
+branching. The trivial path fills them as follows:
+- ``refine``    → :mod:`.refine`     — confirm intent gate.
+- ``memory``    → :mod:`._skipped`   — bypassed.
+- ``analyze``   → :mod:`._skipped`   — bypassed.
+- ``plan``      → :mod:`._skipped`   — bypassed.
+- ``implement`` → :mod:`.apply`      — hard preconditions; reclassify
+  to ``ui-improve`` (full audit gate) when violated.
+- ``test``      → :mod:`.test`       — smoke-test delegate.
+- ``verify``    → :mod:`._skipped`   — bypassed.
+- ``report``    → :mod:`.report`     — one-line delivery summary.
+The directory uses an underscore (``ui_trivial``) because Python
+packages cannot contain hyphens. The schema carries the external
+hyphenated name ``"ui-trivial"``; the dispatcher's loader is the
+single place that translates between them.
+"""
+from __future__ import annotations
+from collections.abc import Mapping
+from ...delivery_state import Step
+from . import _skipped, apply, refine, report, test
+DIRECTIVE_SET_NAME = "ui-trivial"
+"""External name carried in ``state.directive_set`` for this set.
+Note the hyphen \u2014 this is the schema/wire form, not the Python
+module name. The module name (``ui_trivial``) is an implementation
+detail of the loader.
+"""
+ROADMAP = "agents/roadmaps/road-to-product-ui-track.md"
+"""Roadmap that defines this directive bundle (Phase 2 Step 6)."""
+SUPPORTED_KINDS: tuple[str, ...] = ("ticket", "prompt", "diff", "file")
+"""Input kinds this directive set knows how to handle.
+Phase 1's intent classifier reaches ``ui-trivial`` from any of the
+four input kinds; the trivial set keeps the same tuple so input
+routing stays unchanged once the intent label has landed.
+"""
+def _build_step_map() -> dict[str, Step]:
+    """Wire the eight-step dispatcher slots for the trivial set.
+    ``refine`` validates the intent gate; ``implement``, ``test``,
+    and ``report`` carry the trivial-path behavior; the four bypassed
+    slots share :mod:`._skipped` so the dispatcher's completeness
+    check is satisfied without inventing per-slot stubs. The mapping
+    is rebuilt per call (cheap; the dispatcher invokes
+    :func:`get_steps` once per run).
+    """
+    skipped = _skipped.run
+    return {
+        "refine": refine.run,
+        "memory": skipped,
+        "analyze": skipped,
+        "plan": skipped,
+        "implement": apply.run,
+        "test": test.run,
+        "verify": skipped,
+        "report": report.run,
+    }
+def get_steps() -> Mapping[str, Step]:
+    """Return the ``{step_name: handler}`` mapping the dispatcher walks.
+    Mirrors :func:`work_engine.directives.backend.get_steps`. ``refine``,
+    ``implement``, ``test``, and ``report`` carry trivial-path behavior;
+    the four bypassed slots delegate to :mod:`._skipped`.
+    """
+    return _build_step_map()
+def all_ambiguities() -> dict[str, tuple[dict[str, str], ...]]:
+    """Per-step ambiguity declarations.
+    Mirrors :func:`work_engine.directives.backend.all_ambiguities`.
+    The four bypassed slots re-export :data:`_skipped.AMBIGUITIES`
+    (an empty tuple) so doc generators see a uniform shape across all
+    eight steps.
+    """
+    skipped = _skipped.AMBIGUITIES
+    return {
+        "refine": refine.AMBIGUITIES,
+        "memory": skipped,
+        "analyze": skipped,
+        "plan": skipped,
+        "implement": apply.AMBIGUITIES,
+        "test": test.AMBIGUITIES,
+        "verify": skipped,
+        "report": report.AMBIGUITIES,
+    }
+__all__ = [
+    "DIRECTIVE_SET_NAME",
+    "ROADMAP",
+    "SUPPORTED_KINDS",
+    "all_ambiguities",
+    "apply",
+    "get_steps",
+    "refine",
+    "report",
+    "test",
+]

package/.agent-src/templates/scripts/work_engine/directives/ui_trivial/_skipped.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Pass-through handler for slots the trivial path skips.
+Phase 2 Step 6 of ``agents/roadmaps/road-to-product-ui-track.md``: the
+``ui-trivial`` directive set short-circuits the audit / design / review
+/ polish loop. Per the roadmap (Phase 1 Step 3, Phase 2 Step 6) the
+trivial path "skips audit + design + review; runs apply + smoke-test
+only; emits short delivery report".
+The dispatcher's ``STEP_ORDER`` is fixed (eight slots, no branching),
+so the trivial set fills the unused slots — ``memory``, ``analyze``,
+``plan``, ``verify`` — with this no-op handler. It returns ``SUCCESS``
+without touching state, mutates nothing, and declares zero
+ambiguities. The audit gate is **not** weakened: trivial bypass is
+gated upstream by ``apply``'s hard preconditions, which reclassify
+to ``ui-improve`` (and the full audit gate) when violated.
+"""
+from __future__ import annotations
+from ...delivery_state import DeliveryState, Outcome, StepResult
+AMBIGUITIES: tuple[dict[str, str], ...] = ()
+"""No ambiguities — the slot is unconditionally skipped on the trivial path."""
+def run(state: DeliveryState) -> StepResult:
+    """Return ``SUCCESS`` without touching ``state``.
+    Used as a shared handler for the slots that the trivial path
+    intentionally bypasses. Keeping the slot wired (rather than
+    raising ``NotImplementedError``) preserves the dispatcher's
+    completeness-check invariant: every slot in :data:`STEP_ORDER`
+    has a callable handler, every directive set has a uniform shape.
+    """
+    return StepResult(outcome=Outcome.SUCCESS)
+__all__ = ["AMBIGUITIES", "run"]