npm - @event4u/agent-config - Versions diffs - 2.8.0 → 2.10.0 - Mend

@event4u/agent-config 2.8.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/.agent-src/personas/engineering-manager.md +133 -0
package/.agent-src/personas/finance-partner.md +129 -0
package/.agent-src/personas/people-strategist.md +126 -0
package/.agent-src/personas/strategist.md +129 -0
package/.agent-src/rules/no-roadmap-references.md +19 -0
package/.agent-src/skills/build-buy-partner/SKILL.md +145 -0
package/.agent-src/skills/comp-banding/SKILL.md +160 -0
package/.agent-src/skills/competitive-moat-analysis/SKILL.md +152 -0
package/.agent-src/skills/contracts-cognition/SKILL.md +147 -0
package/.agent-src/skills/data-handling-judgment/SKILL.md +155 -0
package/.agent-src/skills/forecasting/SKILL.md +164 -0
package/.agent-src/skills/hiring-loop-design/SKILL.md +167 -0
package/.agent-src/skills/market-entry-analysis/SKILL.md +144 -0
package/.agent-src/skills/onboarding-program/SKILL.md +157 -0
package/.agent-src/skills/one-on-one-cadence/SKILL.md +161 -0
package/.agent-src/skills/org-design/SKILL.md +158 -0
package/.agent-src/skills/perf-feedback-craft/SKILL.md +157 -0
package/.agent-src/skills/privacy-review/SKILL.md +160 -0
package/.agent-src/skills/runway-cognition/SKILL.md +136 -0
package/.agent-src/skills/scenario-modeling/SKILL.md +139 -0
package/.agent-src/skills/throughput-vs-morale-tradeoff/SKILL.md +165 -0
package/.agent-src/skills/unit-economics-modeling/SKILL.md +54 -7
package/.agent-src/skills/vision-articulation/SKILL.md +146 -0
package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
package/.agent-src/templates/scripts/telemetry/settings.py +65 -0
package/.agent-src/templates/scripts/tier_usage_report.py +183 -0
package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +32 -3
package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +147 -1
package/.claude-plugin/marketplace.json +18 -1
package/AGENTS.md +1 -1
package/CHANGELOG.md +134 -0
package/README.md +34 -14
package/config/agent-settings.template.yml +28 -0
package/docs/architecture.md +37 -11
package/docs/catalog.md +22 -4
package/docs/contracts/adr-forecast-construction-shape.md +89 -0
package/docs/contracts/adr-wing4-context-spine.md +125 -0
package/docs/contracts/command-clusters.md +41 -0
package/docs/contracts/command-surface-tiers.md +25 -9
package/docs/contracts/context-spine.md +8 -0
package/docs/contracts/decision-trace-v1.md +30 -0
package/docs/contracts/hook-architecture-v1.md +46 -0
package/docs/contracts/mcp-beta-criteria.md +129 -0
package/docs/contracts/memory-visibility-v1.md +33 -0
package/docs/contracts/settings-sync-yaml-subset.md +138 -0
package/docs/guidelines/wing4-handoff.md +127 -0
package/docs/mcp-server.md +1 -1
package/docs/readme-split-plan.md +102 -0
package/package.json +1 -1
package/scripts/_cli/cmd_doctor.py +527 -14
package/scripts/_cli/cmd_settings_check.py +171 -0
package/scripts/_cli/cmd_validate.py +10 -0
package/scripts/agent-config +59 -18
package/scripts/chat_history.py +19 -0
package/scripts/check_council_references.py +46 -5
package/scripts/hooks/dispatch_hook.py +5 -1
package/scripts/hooks/replay_hook.py +144 -0
package/scripts/hooks/state_io.py +24 -1
package/scripts/hooks_doctor.py +184 -0
package/scripts/install.py +5 -0
package/scripts/lint_context_spine_usage.py +1 -0
package/scripts/lint_hook_concern_budget.py +203 -0
package/scripts/mcp_server/__init__.py +1 -0
package/scripts/mcp_server/server.py +4 -3
package/scripts/roadmap_progress_hook.py +11 -0
package/scripts/schemas/skill.schema.json +2 -2
package/scripts/skill_linter.py +107 -3

package/.agent-src/skills/unit-economics-modeling/SKILL.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
 name: unit-economics-modeling
-description: "Use when modeling CAC, LTV, gross-margin payback, or contribution margin per customer — for SaaS, marketplace, or transactional businesses."
+description: "Use when modeling CAC, LTV, payback, contribution margin, or burn-multiple per customer — SaaS, marketplace, or transactional. Triggers on 'are we unit-economic', 'what is our LTV/CAC'."
 status: active
 tier: senior
 source: package
 domain: product
+context_spine: [product, fiscal-period]
 ---
 # unit-economics-modeling
@@ -14,9 +15,30 @@ domain: product
 - A board ask: "is this business unit-economic?" — needs CAC / LTV / payback, not vibes.
 - A new channel is scaling and the question is whether the CAC payback period is sustainable.
 - A pricing or packaging change needs to be tested against contribution margin per cohort.
+- A finance-partner needs to construct burn-multiple cognition before the next forecast or scenario pass.
 Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage diagnosis, or backlog ranking (see Related Skills).
+## Cognition cluster
+- **Mental model 1 — First principles.** Strip the unit to one paying
+  customer and one fully-loaded acquisition dollar. Aggregate ratios
+  ride on per-unit truth; if the unit is mis-defined (trial vs paid,
+  household vs seat), every ratio downstream is decoration. See
+  [`docs/contracts/mental-models.md`](../../../docs/contracts/mental-models.md) § 1.
+- **Mental model 8 — Second-order thinking.** A CAC drop driven by
+  discounting lifts LTV/CAC on paper while shortening cohort
+  retention — the second-order effect lands two quarters later in
+  churn. Score the second-order cost of every lever, not just the
+  first-order ratio. See `mental-models.md` § 8.
+- **Context-spine — product + fiscal-period.** Read the **product**
+  slot for what a "customer" actually is in this scope (seat vs
+  household vs paid trial vs activated free), and the
+  **fiscal-period** slot for the close-window the ratios must
+  reconcile against (monthly close vs quarterly board pack vs
+  annual plan). See
+  [`context-spine`](../../../docs/contracts/context-spine.md).
 ## Procedure
 ### Step 0: Inspect
@@ -51,13 +73,30 @@ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage
 2. **LTV / CAC ratio**: target ≥ 3.0. Below 1.5 is acquisition-loss territory; above 5.0 means under-investment in growth (or bad LTV math).
 3. Both numbers, not one. Payback drives capital efficiency; ratio drives long-run economics.
-### Step 5: Cohort the answer
-1. Run Steps 1–4 by signup-quarter cohort. Trends matter more than the point estimate.
+### Step 5: Compute burn-multiple judgment
+1. **Burn multiple** = `net burn / net new ARR` over the fiscal-period
+   slot's reporting window (monthly close / quarterly / annual).
+   It answers *"how many dollars of cash do we burn to add one
+   dollar of recurring revenue?"* — a single ratio that compresses
+   CAC, gross margin, and churn into capital efficiency.
+2. Compute on **net** new ARR (gross new − churn − contraction).
+   Burn-multiple on gross new ARR flatters the picture by exactly
+   the churn rate; auditors and acquirers will recompute.
+3. Read the ratio against the org-stage colour from the
+   **fiscal-period** + product spine — do not hardcode a band here.
+   The cognition is *"smaller is better, and the direction across
+   cohorts matters more than the point estimate."* Bands belong in
+   `runway-cognition` (O3) where stage context is the load-bearing
+   input.
+### Step 6: Cohort the answer
+1. Run Steps 1–5 by signup-quarter cohort. Trends matter more than the point estimate.
 2. If LTV/CAC is improving but payback is lengthening, you are buying retention with discounting — flag.
 3. If both deteriorate, the channel mix has shifted to a worse channel — segment by channel to find the leak.
-### Step 6: Validate
+### Step 7: Validate
 1. Sanity-check LTV against revenue retention. If implied LTV > 8× annual revenue per customer with monthly churn > 2%, the math is wrong.
 2. Sanity-check CAC against fully-loaded P&L. If channel CACs sum to less than total acquisition spend, allocations are missing.
@@ -88,6 +127,13 @@ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage
 - Diagnosing where conversion drops — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
 - Ranking competing initiatives — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
 - Setting team objectives that move these metrics — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
+- Cash-runway shape, fundraise-trigger heuristics, or layoff-vs-cut-vs-grow framing — route to [`runway-cognition`](../runway-cognition/SKILL.md) (O3).
+- Multi-statement scenario construction over base / upside / downside — route to [`scenario-modeling`](../scenario-modeling/SKILL.md) (O4).
+- Forecast-call construction (commit / best-case / pipeline) — route to [`forecasting`](../forecasting/SKILL.md) (O2).
+Wing-4 handoff: this skill ships the `unit-economics-frame.md`
+artifact that `scenario-modeling` (O4) reads as its money input
+(`docs/guidelines/wing4-handoff.md` § Chain 1).
 ## When the agent should load this
@@ -99,7 +145,8 @@ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage
 ## Output
-1. **`unit-econ-table.md`** — table per channel and blended: CAC · ARPA · gross margin · payback months · LTV · LTV/CAC. With cohort columns (last 4 quarters).
+1. **`unit-econ-table.md`** — table per channel and blended: CAC · ARPA · gross margin · payback months · LTV · LTV/CAC · burn-multiple. With cohort columns (last 4 quarters).
 2. **`assumptions.md`** — formula chosen (SaaS / marketplace / transactional), churn definition, COGS allocation method, lifetime cap. One bullet per choice.
-3. **`cohort-trend.md`** — trend chart (ASCII or markdown table) of CAC, payback, LTV/CAC over the last 4–8 cohorts. Annotate channel-mix shifts.
+3. **`cohort-trend.md`** — trend chart (ASCII or markdown table) of CAC, payback, LTV/CAC, burn-multiple over the last 4–8 cohorts. Annotate channel-mix shifts.
 4. **`sanity-checks.md`** — explicit cross-checks (LTV vs annual revenue, channel CAC sum vs P&L). Flag any that fail with a one-line investigation pointer.
+5. **`unit-economics-frame.md`** *(Wing-4 handoff)* — the typed artifact `scenario-modeling` (O4) reads: CAC / LTV ratio, contribution margin, payback band, burn-multiple verdict, segment scope, fiscal-period the frame reconciles against. Per `docs/guidelines/wing4-handoff.md` § Chain 1.

package/.agent-src/skills/vision-articulation/SKILL.md ADDED Viewed

@@ -0,0 +1,146 @@
+---
+name: vision-articulation
+description: "Use when articulating internal vision — where we're going / why now / why us, founder-mode anchor, distinct from fundraising pitch. Triggers on 'what's our vision', 'why are we doing this'."
+status: active
+tier: senior
+source: package
+domain: process
+context_spine: [org-stage, product, customer-segment]
+---
+# vision-articulation
+## When to use
+- An internal anchor is needed — board off-site, all-hands, strategy doc — and the question is *where are we going, why now, why us*. The audience is the team, not investors.
+- A founder or strategist is sense-checking whether the current direction still holds, or whether reality has diverged from the stated vision.
+- A new hire / new exec needs the founder-mode read of the company — not the fundraise pitch, the durable internal frame.
+Do NOT use for outward-facing fundraise pitch (route to Wing-3 `fundraising-narrative` (H7); vision is internal-anchor, fundraising is external-pitch — different audiences, different proof bars), positioning copy / launch narrative (route to Wing-3 `positioning-strategy` / `messaging-architecture`), or roadmap construction (route to `feature-roadmap` workflows; vision constrains roadmaps, doesn't replace them).
+## Cognition cluster
+- **Mental model 27 — Why now.** Vision without a *why now* is a wishlist. The market shift, technology wave, regulatory change, or demographic inflection that makes the vision *achievable in this window* is the load-bearing claim. See [`mental-models.md`](../../../docs/contracts/mental-models.md) § 27.
+- **Mental model 1 — First principles.** Strip the vision to outcomes for a named cohort, not feature lists or company milestones. *"Be the X for Y"* is shape; *"customer C does outcome O 10× faster"* is substance. See `mental-models.md` § 1.
+- **Mental model 21 — Second-order thinking.** *"If this vision is realised, what does the world look like in 5 years?"* If the answer is just *"we got bigger"*, the vision isn't load-bearing; if the answer is structural change (workflow shift, market re-shape), it is. See `mental-models.md` § 21.
+- **Context-spine — org-stage + product + customer-segment.** Read **org-stage** for vision-horizon (pre-seed = 3-year survival vision; growth = 5–7-year market-shape vision; mature = 10-year). Read **product** for the realistic shipping shape. Read **customer-segment** for the cohort whose outcome anchors the vision.
+## Procedure
+### Step 0: Identify stance — founder vs operator
+Vision-articulation is a founder-mode act, not an operator-mode act (per council Q6). Before starting:
+1. Confirm the requester is in founder-stance — they are setting the durable frame, not optimising within an existing frame.
+2. If the requester is in operator-stance (planning, executing, refining within a fixed frame), this skill is wrong; route to roadmap / planning skills.
+Founder-stance is the precondition; without it the output is mis-shaped.
+### Step 1: Frame "where we're going" — cohort + outcome + horizon
+One sentence: *"In [horizon], [customer-segment] [does outcome O] [because of structural change S]."*
+Anti-patterns to reject:
+1. *"We're the [Big Company] of [Vertical]"* — feature-comparison, not cohort outcome. Reject.
+2. *"We're building the future of X"* — generic, no cohort named. Reject.
+3. *"We're growing to $100M ARR"* — milestone, not vision. Reject.
+The sentence must name *who*, *what they do that they can't do today*, *why it matters in [horizon]*.
+### Step 2: Construct "why now"
+Name 2–3 inflections that make this vision achievable in this window — not next decade, not last decade:
+1. **Technology inflection** — capability newly cheap / newly possible (AI, edge compute, new platform).
+2. **Market inflection** — buyer behaviour shift, segment opening, incumbent vulnerability (M&A churn, regulatory burden, talent loss).
+3. **Demographic / regulatory inflection** — workforce shift, policy change, generational handover.
+Each inflection must be *recent* (last 24–36 months) or *imminent* (next 24 months). Inflections from 10 years ago are settled; inflections 10 years out are speculative. The *why now* window is narrow.
+### Step 3: Construct "why us"
+Name 2–3 reasons this team, not another, can execute the vision. Honesty test:
+1. **Unique capability** — composes `competitive-moat-analysis` (P3); cite the moat dimensions where we score strong with evidence.
+2. **Unique distribution** — channel, community, partnership we have and competitors structurally can't replicate.
+3. **Unique insight** — founder / team origin gives us a read on the cohort that competitors don't have.
+If the answer is *"we're hardworking"* or *"we move fast"*, the answer is none. Most teams are. Re-run.
+### Step 4: Inversion — what would falsify the vision?
+For each load-bearing claim (cohort outcome, why-now inflection, why-us reason), write:
+1. *"What evidence in the next 12 months would falsify this?"* — leading signal that the vision is wrong-shaped.
+2. *"What change in the world would make this vision obsolete?"* — exogenous shift we're betting against.
+A vision that can't be falsified is a slogan. Concrete falsifiers = real vision.
+### Step 5: Validate the vision before emitting
+Before producing the artifact, verify three things:
+1. **Cohort-outcome specificity** — confirm the Step-1 sentence names a specific cohort + specific outcome + specific horizon; generic phrasing fails and must be re-run.
+2. **Why-now timestamp** — assert each Step-2 inflection is dated within the recent-24-36-months or imminent-24-months window; un-timestamped inflections are claims, not inflections.
+3. **Why-us falsifiability** — check that each Step-3 reason has a named Step-4 falsifier; un-falsifiable reasons are slogans and must be demoted.
+All three must pass. If any fails, return to the failing step.
+### Step 6: Emit the vision frame
+Produce the vision-frame artifact for internal use (board, all-hands, exec onboarding). Hand off to Wing-3 `fundraising-narrative` (H7) if an external pitch derivative is needed — that's a translation, not a copy.
+## Related Skills
+**WHEN to use this**
+- Internal vision articulation for board off-site, all-hands, strategy doc.
+- Founder / exec sense-check on vision-vs-reality divergence.
+- New-hire / new-exec onboarding to founder-mode read.
+**WHEN NOT to use this**
+- Outward-facing fundraise pitch — route to Wing-3 [`fundraising-narrative`](../fundraising-narrative/SKILL.md) (H7); vision is internal anchor, fundraising is external pitch.
+- Positioning / messaging copy — route to Wing-3 [`positioning-strategy`](../positioning-strategy/SKILL.md) and [`messaging-architecture`](../messaging-architecture/SKILL.md).
+- Moat reading — route to [`competitive-moat-analysis`](../competitive-moat-analysis/SKILL.md) (P3); this skill composes P3 for the "why us" frame.
+- Roadmap / phase planning — route to roadmap workflows; vision constrains roadmaps, doesn't replace them.
+## When the agent should load this
+- "What's our vision?"
+- "Sense-check our direction — does this still hold?"
+- "Draft the vision section for the board off-site."
+- "Why now / why us frame for the strategy doc."
+- "Wo wollen wir hin und warum jetzt?"
+## Output
+1. **`vision-frame.md`** — one-sentence vision (cohort + outcome + horizon), three "why now" inflections, three "why us" reasons.
+2. **`falsifiers.md`** — leading signals + exogenous shifts that would falsify each load-bearing claim.
+3. **`vision-vs-reality.md`** *(optional)* — when used as a sense-check, the delta between stated vision and current trajectory; named divergence points.
+## Gotcha
+- "Be the X for Y" frames are positioning slogans, not vision. Reject; force cohort-outcome-horizon.
+- "Why now" with no timestamp is a claim, not an inflection. Force a date.
+- "Why us" answers like *"we're a great team"* mean none. Force unique capability / distribution / insight cited with evidence.
+- Vision artifacts that survive zero change in the world over 5 years are slogans. Real visions have falsifiers.
+## Do NOT
+- Do NOT collapse vision into fundraise pitch — different audiences, different proof bars.
+- Do NOT skip the inversion / falsifier step — un-stressed visions are unfalsifiable.
+- Do NOT bolt vision onto roadmap milestones — milestones are downstream of vision, not the same thing.
+## Runnable example
+Series-A vertical SaaS, founder requests vision sense-check for board off-site.
+- Step 0 — Stance: founder-mode confirmed (off-site context, setting durable frame).
+- Step 1 — Vision: *"In 5 years, mid-size healthcare specialty groups (50–200 providers) deliver patient scheduling with 80 % less administrative load because the workflow is regulation-aware and self-adapting per state."*
+- Step 2 — Why now: (a) state-licensure data became machine-readable in the last 24 months (tech inflection); (b) incumbent hospital-focused vendors hit their growth ceiling and are starting to mis-fit specialty groups (market inflection); (c) post-COVID, specialty-group practice owners control admin spend directly (buyer-behaviour shift, last 36 months).
+- Step 3 — Why us: (a) founder's clinical-ops background + customer-segment access (unique insight, cited with 12 customer interviews); (b) switching-cost moat from 24-month deployment depth (composes P3, cited evidence); (c) HIPAA + 50-state licensure capability stack (unique capability).
+- Step 4 — Falsifiers: (a) incumbent ships specialty-group focused migration tooling = unique-capability claim erodes; (b) state-licensure data becomes opaque again (regulatory rollback) = tech inflection inverts; (c) specialty-group consolidation accelerates so 50–200 cohort shrinks = cohort scope shrinks.
+- Step 5 — Validate: cohort + outcome + horizon present; why-now inflections all dated within 24–36 months; why-us reasons each have falsifier. Pass.
+- Step 6 — Emit vision-frame for board off-site; flag potential H7 derivative needed for the upcoming Series-B narrative.

package/.agent-src/templates/agents/agent-project-settings.example.yml CHANGED Viewed

@@ -39,7 +39,7 @@ schema_version: 1
 # CI guard: a release bump of `package.json` must update this value
 # in lockstep — see scripts/check_template_pin_drift.py (road-to-
 # portable-runtime-and-update-check P3.3).
-agent_config_version: "2.7.0"
+agent_config_version: "2.8.0"
 # --- Project identity ---
 project:

package/.agent-src/templates/scripts/telemetry/settings.py CHANGED Viewed

@@ -18,6 +18,12 @@ DEFAULT_LOG_PATH = Path(".agent-engagement.jsonl")
 DEFAULT_GRANULARITY = "task"
 ALLOWED_GRANULARITIES = ("task", "phase-step", "tool-call")
+#: Defaults for the tier-usage signal (Phase 5 of road-to-surface-discipline).
+#: Separate file, separate opt-in, same default-off posture. Contract:
+#: ``docs/contracts/command-clusters.md#tier-usage-signal-contract``.
+DEFAULT_TIER_USAGE_LOG_PATH = Path(".agent-tier-usage.jsonl")
+DEFAULT_TIER_USAGE_RETIER = {"window_days": 30, "min_invocations": 20, "min_distinct_users": 3}
 @dataclass(frozen=True)
 class TelemetrySettings:
@@ -104,9 +110,68 @@ def read_settings(path: Path) -> TelemetrySettings:
     return settings
+@dataclass(frozen=True)
+class TierUsageSettings:
+    enabled: bool
+    log_path: Path
+    window_days: int
+    min_invocations: int
+    min_distinct_users: int
+def read_tier_usage_settings(path: Path) -> TierUsageSettings:
+    """Return parsed tier-usage settings — never raises on missing data.
+    Sibling of :func:`read_settings`; reads the
+    ``telemetry.tier_usage`` namespace from ``.agent-settings.yml``.
+    Default-off, same parse-tolerant shape — a missing file, a missing
+    section, or PyYAML being absent all collapse to ``enabled=False``.
+    """
+    section: dict[str, Any] = {}
+    if path.is_file():
+        try:
+            import yaml  # type: ignore[import-not-found]
+        except ImportError:
+            yaml = None  # type: ignore[assignment]
+        if yaml is not None:
+            try:
+                raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
+            except Exception:
+                raw = {}
+            if isinstance(raw, dict):
+                tele = raw.get("telemetry")
+                if isinstance(tele, dict):
+                    tu = tele.get("tier_usage")
+                    if isinstance(tu, dict):
+                        section = tu
+    output = section.get("output") if isinstance(section.get("output"), dict) else {}
+    retier = section.get("retier") if isinstance(section.get("retier"), dict) else {}
+    defaults = DEFAULT_TIER_USAGE_RETIER
+    def _coerce_int(value: Any, default: int) -> int:
+        if isinstance(value, bool):
+            return default
+        if isinstance(value, int) and value >= 0:
+            return value
+        return default
+    return TierUsageSettings(
+        enabled=_coerce_bool(section.get("enabled"), default=False),
+        log_path=_coerce_path(output.get("path"), DEFAULT_TIER_USAGE_LOG_PATH),
+        window_days=_coerce_int(retier.get("window_days"), defaults["window_days"]),
+        min_invocations=_coerce_int(retier.get("min_invocations"), defaults["min_invocations"]),
+        min_distinct_users=_coerce_int(retier.get("min_distinct_users"), defaults["min_distinct_users"]),
+    )
 __all__ = [
     "DEFAULT_GRANULARITY",
     "DEFAULT_LOG_PATH",
+    "DEFAULT_TIER_USAGE_LOG_PATH",
+    "DEFAULT_TIER_USAGE_RETIER",
     "TelemetrySettings",
+    "TierUsageSettings",
     "read_settings",
+    "read_tier_usage_settings",
 ]

package/.agent-src/templates/scripts/tier_usage_report.py ADDED Viewed

@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""Tier-usage report — aggregate the local tier-usage log into a frequency table.
+Phase 5 Step 3 of road-to-surface-discipline. Reads the JSONL log
+written by the dispatcher (default ``.agent-tier-usage.jsonl``; override
+via ``telemetry.tier_usage.output.path``) and emits a per-command
+frequency table grouped by tier, plus distinct ``user_hash`` counts.
+Run-local-only; no upload, no remote aggregation.
+Privacy floor mirrors the contract in
+``docs/contracts/command-clusters.md#tier-usage-signal-contract`` and
+the four-layer enforcement model used by artefact-engagement telemetry.
+Records that carry any field outside the contract whitelist are dropped
+at the read gate — the report refuses to render leaked shapes rather
+than re-emit them.
+Usage:
+    python3 tier_usage_report.py                       # last 30d, table
+    python3 tier_usage_report.py --window-days 7       # last 7d
+    python3 tier_usage_report.py --window-days 0       # full log
+    python3 tier_usage_report.py --json                # JSON for tooling
+    python3 tier_usage_report.py --log-path X.jsonl    # archived snapshot
+Exit codes:
+    0   success or telemetry disabled (single header line)
+    1   no records survived the privacy floor on a non-empty file
+    2   IO error (permission denied; passed path missing)
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from collections import defaultdict
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any
+from telemetry.settings import DEFAULT_TIER_USAGE_LOG_PATH, read_tier_usage_settings
+#: Contract whitelist (see ``docs/contracts/command-clusters.md``).
+ALLOWED_FIELDS = frozenset({"ts_bucket", "command", "tier", "outcome", "user_hash"})
+ALLOWED_OUTCOMES = frozenset({"success", "error", "blocked"})
+def _parse_record(raw: str) -> dict[str, Any] | None:
+    """Return a sanitized record or ``None`` when the line violates the floor."""
+    try:
+        obj = json.loads(raw)
+    except json.JSONDecodeError:
+        return None
+    if not isinstance(obj, dict):
+        return None
+    if not set(obj.keys()).issubset(ALLOWED_FIELDS):
+        return None
+    cmd = obj.get("command")
+    if not isinstance(cmd, str) or not cmd or "/" in cmd or "\\" in cmd:
+        return None
+    if not isinstance(obj.get("tier"), int) or obj["tier"] not in (0, 1, 2, 3):
+        return None
+    if obj.get("outcome") not in ALLOWED_OUTCOMES:
+        return None
+    uh = obj.get("user_hash")
+    if not isinstance(uh, str) or len(uh) != 16:
+        return None
+    if not isinstance(obj.get("ts_bucket"), str):
+        return None
+    return obj
+def _within_window(ts_bucket: str, window_days: int | None) -> bool:
+    if window_days is None or window_days == 0:
+        return True
+    try:
+        ts = datetime.fromisoformat(ts_bucket.replace("Z", "+00:00"))
+    except ValueError:
+        return False
+    if ts.tzinfo is None:
+        ts = ts.replace(tzinfo=timezone.utc)
+    return ts >= datetime.now(timezone.utc) - timedelta(days=window_days)
+def aggregate(
+    log_path: Path, window_days: int,
+) -> tuple[dict[tuple[int, str], dict[str, Any]], int, int]:
+    """Return ``((tier, command) -> stats, total_lines, kept)`` over the window."""
+    buckets: dict[tuple[int, str], dict[str, Any]] = defaultdict(
+        lambda: {"count": 0, "users": set()},
+    )
+    total = 0
+    kept = 0
+    if not log_path.exists():
+        return {}, 0, 0
+    with log_path.open("r", encoding="utf-8") as fh:
+        for line in fh:
+            line = line.strip()
+            if not line:
+                continue
+            total += 1
+            rec = _parse_record(line)
+            if rec is None:
+                continue
+            if not _within_window(rec["ts_bucket"], window_days):
+                continue
+            kept += 1
+            key = (int(rec["tier"]), rec["command"])
+            buckets[key]["count"] += 1
+            buckets[key]["users"].add(rec["user_hash"])
+    out = {k: {"count": v["count"], "distinct_users": len(v["users"])}
+           for k, v in buckets.items()}
+    return out, total, kept
+def render(
+    table: dict[tuple[int, str], dict[str, Any]],
+    window_days: int,
+) -> str:
+    suffix = f" (last {window_days}d)" if window_days else " (full log)"
+    if not table:
+        return f"(no tier-usage records{suffix})\n"
+    rows = sorted(table.items(), key=lambda kv: (kv[0][0], -kv[1]["count"], kv[0][1]))
+    header = f"{'Tier':<6}{'Command':<32}{'Calls':>8}{'Users':>8}"
+    lines = [header, "-" * len(header)]
+    for (tier, command), stats in rows:
+        lines.append(
+            f"{tier:<6}{command:<32}{stats['count']:>8}{stats['distinct_users']:>8}",
+        )
+    lines.append(f"\n(window:{suffix.strip()})")
+    return "\n".join(lines) + "\n"
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Tier-usage frequency report.")
+    parser.add_argument("--window-days", type=int, default=30,
+                        help="trailing window in days (0 = full log)")
+    parser.add_argument("--json", action="store_true",
+                        help="emit JSON instead of the table")
+    parser.add_argument("--log-path", type=Path, default=None,
+                        help="override settings; read an archived log")
+    parser.add_argument("--settings-file", type=Path, default=Path(".agent-settings.yml"))
+    args = parser.parse_args(argv)
+    settings = read_tier_usage_settings(args.settings_file)
+    log_path = args.log_path or settings.log_path or DEFAULT_TIER_USAGE_LOG_PATH
+    if args.log_path is None and not settings.enabled:
+        sys.stdout.write(
+            "(tier-usage telemetry disabled; set "
+            "`telemetry.tier_usage.enabled: true` in .agent-settings.yml)\n",
+        )
+        return 0
+    try:
+        table, total, kept = aggregate(log_path, args.window_days)
+    except OSError as exc:
+        print(f"❌  {exc}", file=sys.stderr)
+        return 2
+    if total > 0 and kept == 0:
+        print(f"❌  {total} record(s) read; 0 survived the privacy floor — "
+              "report refused", file=sys.stderr)
+        return 1
+    if args.json:
+        payload = {
+            "window_days": args.window_days,
+            "log_path": str(log_path),
+            "records_total": total,
+            "records_kept": kept,
+            "rows": [
+                {"tier": t, "command": c, "count": v["count"],
+                 "distinct_users": v["distinct_users"]}
+                for (t, c), v in sorted(table.items(), key=lambda kv: (kv[0][0], kv[0][1]))
+            ],
+        }
+        sys.stdout.write(json.dumps(payload, indent=2) + "\n")
+    else:
+        sys.stdout.write(render(table, args.window_days))
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py CHANGED Viewed

@@ -23,8 +23,11 @@ from __future__ import annotations
 from typing import Any, Iterable
+from ...scoring.decision_trace import summarise_memory, summarise_verify
 from ...scoring.memory_visibility import (
     DEFAULT_ASKED_TYPES,
+    compute_affected,
+    format_changed_decisions_block,
     format_line,
     should_emit,
     summarise_visibility,
@@ -82,20 +85,46 @@ class MemoryVisibilityHook:
             visibility_off=self._visibility_off,
         ):
             return
-        line = format_line(summary)
+        affected = self._derive_affected(work, memory)
+        line = format_line(summary, affected=affected)
         if not line:
             return
+        block = format_changed_decisions_block(
+            summary.get("ids") or [], affected,
+        )
         existing = getattr(work, "report", "") or ""
-        if line in existing:
+        rendered = line if block is None else f"{line}\n\n{block}"
+        if line in existing and (block is None or block in existing):
             return
         sep = "\n\n" if existing else ""
         try:
-            work.report = f"{existing}{sep}{line}"
+            work.report = f"{existing}{sep}{rendered}"
         except AttributeError as exc:
             raise HookError(
                 "memory-visibility: state.report not writable",
             ) from exc
+    def _derive_affected(self, work: Any, memory: Any) -> list[str] | None:
+        """Compute the closed-list ``affected`` keys for this work step.
+        Reuses the decision-trace summarisers so the counterfactual
+        matches the trace hook's view of the same WorkState. Returns
+        ``None`` when memory was not consulted (hits == 0); callers
+        then omit the ``· affected: …`` segment per the contract.
+        """
+        memory_summary = summarise_memory(memory)
+        verify_summary = summarise_verify(getattr(work, "verify", None))
+        ambiguity = bool(getattr(work, "questions", None))
+        return compute_affected(
+            memory_hits=memory_summary["hits"],
+            verify_claims=verify_summary["claims"],
+            verify_first_try_passes=verify_summary["first_try_passes"],
+            ambiguity_flag=ambiguity,
+            changes=getattr(work, "changes", None),
+            applied_rules=getattr(work, "applied_rules", None),
+            test_plan=getattr(work, "test_plan", None),
+        )
 def derive_visibility(memory: Any) -> str | None:
     """Convenience helper: render the line directly from a memory list.