@event4u/agent-config 2.8.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.agent-src/personas/engineering-manager.md +133 -0
  2. package/.agent-src/personas/finance-partner.md +129 -0
  3. package/.agent-src/personas/people-strategist.md +126 -0
  4. package/.agent-src/personas/strategist.md +129 -0
  5. package/.agent-src/rules/no-roadmap-references.md +19 -0
  6. package/.agent-src/skills/build-buy-partner/SKILL.md +145 -0
  7. package/.agent-src/skills/comp-banding/SKILL.md +160 -0
  8. package/.agent-src/skills/competitive-moat-analysis/SKILL.md +152 -0
  9. package/.agent-src/skills/contracts-cognition/SKILL.md +147 -0
  10. package/.agent-src/skills/data-handling-judgment/SKILL.md +155 -0
  11. package/.agent-src/skills/forecasting/SKILL.md +164 -0
  12. package/.agent-src/skills/hiring-loop-design/SKILL.md +167 -0
  13. package/.agent-src/skills/market-entry-analysis/SKILL.md +144 -0
  14. package/.agent-src/skills/onboarding-program/SKILL.md +157 -0
  15. package/.agent-src/skills/one-on-one-cadence/SKILL.md +161 -0
  16. package/.agent-src/skills/org-design/SKILL.md +158 -0
  17. package/.agent-src/skills/perf-feedback-craft/SKILL.md +157 -0
  18. package/.agent-src/skills/privacy-review/SKILL.md +160 -0
  19. package/.agent-src/skills/runway-cognition/SKILL.md +136 -0
  20. package/.agent-src/skills/scenario-modeling/SKILL.md +139 -0
  21. package/.agent-src/skills/throughput-vs-morale-tradeoff/SKILL.md +165 -0
  22. package/.agent-src/skills/unit-economics-modeling/SKILL.md +54 -7
  23. package/.agent-src/skills/vision-articulation/SKILL.md +146 -0
  24. package/.agent-src/templates/agents/agent-project-settings.example.yml +1 -1
  25. package/.agent-src/templates/scripts/telemetry/settings.py +65 -0
  26. package/.agent-src/templates/scripts/tier_usage_report.py +183 -0
  27. package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +32 -3
  28. package/.agent-src/templates/scripts/work_engine/scoring/memory_visibility.py +147 -1
  29. package/.claude-plugin/marketplace.json +18 -1
  30. package/AGENTS.md +1 -1
  31. package/CHANGELOG.md +134 -0
  32. package/README.md +34 -14
  33. package/config/agent-settings.template.yml +28 -0
  34. package/docs/architecture.md +37 -11
  35. package/docs/catalog.md +22 -4
  36. package/docs/contracts/adr-forecast-construction-shape.md +89 -0
  37. package/docs/contracts/adr-wing4-context-spine.md +125 -0
  38. package/docs/contracts/command-clusters.md +41 -0
  39. package/docs/contracts/command-surface-tiers.md +25 -9
  40. package/docs/contracts/context-spine.md +8 -0
  41. package/docs/contracts/decision-trace-v1.md +30 -0
  42. package/docs/contracts/hook-architecture-v1.md +46 -0
  43. package/docs/contracts/mcp-beta-criteria.md +129 -0
  44. package/docs/contracts/memory-visibility-v1.md +33 -0
  45. package/docs/contracts/settings-sync-yaml-subset.md +138 -0
  46. package/docs/guidelines/wing4-handoff.md +127 -0
  47. package/docs/mcp-server.md +1 -1
  48. package/docs/readme-split-plan.md +102 -0
  49. package/package.json +1 -1
  50. package/scripts/_cli/cmd_doctor.py +527 -14
  51. package/scripts/_cli/cmd_settings_check.py +171 -0
  52. package/scripts/_cli/cmd_validate.py +10 -0
  53. package/scripts/agent-config +59 -18
  54. package/scripts/chat_history.py +19 -0
  55. package/scripts/check_council_references.py +46 -5
  56. package/scripts/hooks/dispatch_hook.py +5 -1
  57. package/scripts/hooks/replay_hook.py +144 -0
  58. package/scripts/hooks/state_io.py +24 -1
  59. package/scripts/hooks_doctor.py +184 -0
  60. package/scripts/install.py +5 -0
  61. package/scripts/lint_context_spine_usage.py +1 -0
  62. package/scripts/lint_hook_concern_budget.py +203 -0
  63. package/scripts/mcp_server/__init__.py +1 -0
  64. package/scripts/mcp_server/server.py +4 -3
  65. package/scripts/roadmap_progress_hook.py +11 -0
  66. package/scripts/schemas/skill.schema.json +2 -2
  67. package/scripts/skill_linter.py +107 -3
@@ -1,10 +1,11 @@
1
1
  ---
2
2
  name: unit-economics-modeling
3
- description: "Use when modeling CAC, LTV, gross-margin payback, or contribution margin per customer — for SaaS, marketplace, or transactional businesses."
3
+ description: "Use when modeling CAC, LTV, payback, contribution margin, or burn-multiple per customer — SaaS, marketplace, or transactional. Triggers on 'are we unit-economic', 'what is our LTV/CAC'."
4
4
  status: active
5
5
  tier: senior
6
6
  source: package
7
7
  domain: product
8
+ context_spine: [product, fiscal-period]
8
9
  ---
9
10
 
10
11
  # unit-economics-modeling
@@ -14,9 +15,30 @@ domain: product
14
15
  - A board ask: "is this business unit-economic?" — needs CAC / LTV / payback, not vibes.
15
16
  - A new channel is scaling and the question is whether the CAC payback period is sustainable.
16
17
  - A pricing or packaging change needs to be tested against contribution margin per cohort.
18
+ - A finance-partner needs to construct burn-multiple cognition before the next forecast or scenario pass.
17
19
 
18
20
  Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage diagnosis, or backlog ranking (see Related Skills).
19
21
 
22
+ ## Cognition cluster
23
+
24
+ - **Mental model 1 — First principles.** Strip the unit to one paying
25
+ customer and one fully-loaded acquisition dollar. Aggregate ratios
26
+ ride on per-unit truth; if the unit is mis-defined (trial vs paid,
27
+ household vs seat), every ratio downstream is decoration. See
28
+ [`docs/contracts/mental-models.md`](../../../docs/contracts/mental-models.md) § 1.
29
+ - **Mental model 8 — Second-order thinking.** A CAC drop driven by
30
+ discounting lifts LTV/CAC on paper while shortening cohort
31
+ retention — the second-order effect lands two quarters later in
32
+ churn. Score the second-order cost of every lever, not just the
33
+ first-order ratio. See `mental-models.md` § 8.
34
+ - **Context-spine — product + fiscal-period.** Read the **product**
35
+ slot for what a "customer" actually is in this scope (seat vs
36
+ household vs paid trial vs activated free), and the
37
+ **fiscal-period** slot for the close-window the ratios must
38
+ reconcile against (monthly close vs quarterly board pack vs
39
+ annual plan). See
40
+ [`context-spine`](../../../docs/contracts/context-spine.md).
41
+
20
42
  ## Procedure
21
43
 
22
44
  ### Step 0: Inspect
@@ -51,13 +73,30 @@ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage
51
73
  2. **LTV / CAC ratio**: target ≥ 3.0. Below 1.5 is acquisition-loss territory; above 5.0 means under-investment in growth (or bad LTV math).
52
74
  3. Both numbers, not one. Payback drives capital efficiency; ratio drives long-run economics.
53
75
 
54
- ### Step 5: Cohort the answer
55
-
56
- 1. Run Steps 1–4 by signup-quarter cohort. Trends matter more than the point estimate.
76
+ ### Step 5: Compute burn-multiple judgment
77
+
78
+ 1. **Burn multiple** = `net burn / net new ARR` over the fiscal-period
79
+ slot's reporting window (monthly close / quarterly / annual).
80
+ It answers *"how many dollars of cash do we burn to add one
81
+ dollar of recurring revenue?"* — a single ratio that compresses
82
+ CAC, gross margin, and churn into capital efficiency.
83
+ 2. Compute on **net** new ARR (gross new − churn − contraction).
84
+ Burn-multiple on gross new ARR flatters the picture by exactly
85
+ the churn rate; auditors and acquirers will recompute.
86
+ 3. Read the ratio against the org-stage colour from the
87
+ **fiscal-period** + product spine — do not hardcode a band here.
88
+ The cognition is *"smaller is better, and the direction across
89
+ cohorts matters more than the point estimate."* Bands belong in
90
+ `runway-cognition` (O3) where stage context is the load-bearing
91
+ input.
92
+
93
+ ### Step 6: Cohort the answer
94
+
95
+ 1. Run Steps 1–5 by signup-quarter cohort. Trends matter more than the point estimate.
57
96
  2. If LTV/CAC is improving but payback is lengthening, you are buying retention with discounting — flag.
58
97
  3. If both deteriorate, the channel mix has shifted to a worse channel — segment by channel to find the leak.
59
98
 
60
- ### Step 6: Validate
99
+ ### Step 7: Validate
61
100
 
62
101
  1. Sanity-check LTV against revenue retention. If implied LTV > 8× annual revenue per customer with monthly churn > 2%, the math is wrong.
63
102
  2. Sanity-check CAC against fully-loaded P&L. If channel CACs sum to less than total acquisition spend, allocations are missing.
@@ -88,6 +127,13 @@ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage
88
127
  - Diagnosing where conversion drops — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
89
128
  - Ranking competing initiatives — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
90
129
  - Setting team objectives that move these metrics — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
130
+ - Cash-runway shape, fundraise-trigger heuristics, or layoff-vs-cut-vs-grow framing — route to [`runway-cognition`](../runway-cognition/SKILL.md) (O3).
131
+ - Multi-statement scenario construction over base / upside / downside — route to [`scenario-modeling`](../scenario-modeling/SKILL.md) (O4).
132
+ - Forecast-call construction (commit / best-case / pipeline) — route to [`forecasting`](../forecasting/SKILL.md) (O2).
133
+
134
+ Wing-4 handoff: this skill ships the `unit-economics-frame.md`
135
+ artifact that `scenario-modeling` (O4) reads as its money input
136
+ (`docs/guidelines/wing4-handoff.md` § Chain 1).
91
137
 
92
138
  ## When the agent should load this
93
139
 
@@ -99,7 +145,8 @@ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage
99
145
 
100
146
  ## Output
101
147
 
102
- 1. **`unit-econ-table.md`** — table per channel and blended: CAC · ARPA · gross margin · payback months · LTV · LTV/CAC. With cohort columns (last 4 quarters).
148
+ 1. **`unit-econ-table.md`** — table per channel and blended: CAC · ARPA · gross margin · payback months · LTV · LTV/CAC · burn-multiple. With cohort columns (last 4 quarters).
103
149
  2. **`assumptions.md`** — formula chosen (SaaS / marketplace / transactional), churn definition, COGS allocation method, lifetime cap. One bullet per choice.
104
- 3. **`cohort-trend.md`** — trend chart (ASCII or markdown table) of CAC, payback, LTV/CAC over the last 4–8 cohorts. Annotate channel-mix shifts.
150
+ 3. **`cohort-trend.md`** — trend chart (ASCII or markdown table) of CAC, payback, LTV/CAC, burn-multiple over the last 4–8 cohorts. Annotate channel-mix shifts.
105
151
  4. **`sanity-checks.md`** — explicit cross-checks (LTV vs annual revenue, channel CAC sum vs P&L). Flag any that fail with a one-line investigation pointer.
152
+ 5. **`unit-economics-frame.md`** *(Wing-4 handoff)* — the typed artifact `scenario-modeling` (O4) reads: CAC / LTV ratio, contribution margin, payback band, burn-multiple verdict, segment scope, fiscal-period the frame reconciles against. Per `docs/guidelines/wing4-handoff.md` § Chain 1.
@@ -0,0 +1,146 @@
1
+ ---
2
+ name: vision-articulation
3
+ description: "Use when articulating internal vision — where we're going / why now / why us, founder-mode anchor, distinct from fundraising pitch. Triggers on 'what's our vision', 'why are we doing this'."
4
+ status: active
5
+ tier: senior
6
+ source: package
7
+ domain: process
8
+ context_spine: [org-stage, product, customer-segment]
9
+ ---
10
+
11
+ # vision-articulation
12
+
13
+ ## When to use
14
+
15
+ - An internal anchor is needed — board off-site, all-hands, strategy doc — and the question is *where are we going, why now, why us*. The audience is the team, not investors.
16
+ - A founder or strategist is sense-checking whether the current direction still holds, or whether reality has diverged from the stated vision.
17
+ - A new hire / new exec needs the founder-mode read of the company — not the fundraise pitch, the durable internal frame.
18
+
19
+ Do NOT use for outward-facing fundraise pitch (route to Wing-3 `fundraising-narrative` (H7); vision is internal-anchor, fundraising is external-pitch — different audiences, different proof bars), positioning copy / launch narrative (route to Wing-3 `positioning-strategy` / `messaging-architecture`), or roadmap construction (route to `feature-roadmap` workflows; vision constrains roadmaps, doesn't replace them).
20
+
21
+ ## Cognition cluster
22
+
23
+ - **Mental model 27 — Why now.** Vision without a *why now* is a wishlist. The market shift, technology wave, regulatory change, or demographic inflection that makes the vision *achievable in this window* is the load-bearing claim. See [`mental-models.md`](../../../docs/contracts/mental-models.md) § 27.
24
+ - **Mental model 1 — First principles.** Strip the vision to outcomes for a named cohort, not feature lists or company milestones. *"Be the X for Y"* is shape; *"customer C does outcome O 10× faster"* is substance. See `mental-models.md` § 1.
25
+ - **Mental model 21 — Second-order thinking.** *"If this vision is realised, what does the world look like in 5 years?"* If the answer is just *"we got bigger"*, the vision isn't load-bearing; if the answer is structural change (workflow shift, market re-shape), it is. See `mental-models.md` § 21.
26
+ - **Context-spine — org-stage + product + customer-segment.** Read **org-stage** for vision-horizon (pre-seed = 3-year survival vision; growth = 5–7-year market-shape vision; mature = 10-year). Read **product** for the realistic shipping shape. Read **customer-segment** for the cohort whose outcome anchors the vision.
27
+
28
+ ## Procedure
29
+
30
+ ### Step 0: Identify stance — founder vs operator
31
+
32
+ Vision-articulation is a founder-mode act, not an operator-mode act (per council Q6). Before starting:
33
+
34
+ 1. Confirm the requester is in founder-stance — they are setting the durable frame, not optimising within an existing frame.
35
+ 2. If the requester is in operator-stance (planning, executing, refining within a fixed frame), this skill is wrong; route to roadmap / planning skills.
36
+
37
+ Founder-stance is the precondition; without it the output is mis-shaped.
38
+
39
+ ### Step 1: Frame "where we're going" — cohort + outcome + horizon
40
+
41
+ One sentence: *"In [horizon], [customer-segment] [does outcome O] [because of structural change S]."*
42
+
43
+ Anti-patterns to reject:
44
+
45
+ 1. *"We're the [Big Company] of [Vertical]"* — feature-comparison, not cohort outcome. Reject.
46
+ 2. *"We're building the future of X"* — generic, no cohort named. Reject.
47
+ 3. *"We're growing to $100M ARR"* — milestone, not vision. Reject.
48
+
49
+ The sentence must name *who*, *what they do that they can't do today*, *why it matters in [horizon]*.
50
+
51
+ ### Step 2: Construct "why now"
52
+
53
+ Name 2–3 inflections that make this vision achievable in this window — not next decade, not last decade:
54
+
55
+ 1. **Technology inflection** — capability newly cheap / newly possible (AI, edge compute, new platform).
56
+ 2. **Market inflection** — buyer behaviour shift, segment opening, incumbent vulnerability (M&A churn, regulatory burden, talent loss).
57
+ 3. **Demographic / regulatory inflection** — workforce shift, policy change, generational handover.
58
+
59
+ Each inflection must be *recent* (last 24–36 months) or *imminent* (next 24 months). Inflections from 10 years ago are settled; inflections 10 years out are speculative. The *why now* window is narrow.
60
+
61
+ ### Step 3: Construct "why us"
62
+
63
+ Name 2–3 reasons this team, not another, can execute the vision. Honesty test:
64
+
65
+ 1. **Unique capability** — composes `competitive-moat-analysis` (P3); cite the moat dimensions where we score strong with evidence.
66
+ 2. **Unique distribution** — channel, community, partnership we have and competitors structurally can't replicate.
67
+ 3. **Unique insight** — founder / team origin gives us a read on the cohort that competitors don't have.
68
+
69
+ If the answer is *"we're hardworking"* or *"we move fast"*, the answer is none. Most teams are. Re-run.
70
+
71
+ ### Step 4: Inversion — what would falsify the vision?
72
+
73
+ For each load-bearing claim (cohort outcome, why-now inflection, why-us reason), write:
74
+
75
+ 1. *"What evidence in the next 12 months would falsify this?"* — leading signal that the vision is wrong-shaped.
76
+ 2. *"What change in the world would make this vision obsolete?"* — exogenous shift we're betting against.
77
+
78
+ A vision that can't be falsified is a slogan. Concrete falsifiers = real vision.
79
+
80
+ ### Step 5: Validate the vision before emitting
81
+
82
+ Before producing the artifact, verify three things:
83
+
84
+ 1. **Cohort-outcome specificity** — confirm the Step-1 sentence names a specific cohort + specific outcome + specific horizon; generic phrasing fails and must be re-run.
85
+ 2. **Why-now timestamp** — assert each Step-2 inflection is dated within the recent-24-36-months or imminent-24-months window; un-timestamped inflections are claims, not inflections.
86
+ 3. **Why-us falsifiability** — check that each Step-3 reason has a named Step-4 falsifier; un-falsifiable reasons are slogans and must be demoted.
87
+
88
+ All three must pass. If any fails, return to the failing step.
89
+
90
+ ### Step 6: Emit the vision frame
91
+
92
+ Produce the vision-frame artifact for internal use (board, all-hands, exec onboarding). Hand off to Wing-3 `fundraising-narrative` (H7) if an external pitch derivative is needed — that's a translation, not a copy.
93
+
94
+ ## Related Skills
95
+
96
+ **WHEN to use this**
97
+
98
+ - Internal vision articulation for board off-site, all-hands, strategy doc.
99
+ - Founder / exec sense-check on vision-vs-reality divergence.
100
+ - New-hire / new-exec onboarding to founder-mode read.
101
+
102
+ **WHEN NOT to use this**
103
+
104
+ - Outward-facing fundraise pitch — route to Wing-3 [`fundraising-narrative`](../fundraising-narrative/SKILL.md) (H7); vision is internal anchor, fundraising is external pitch.
105
+ - Positioning / messaging copy — route to Wing-3 [`positioning-strategy`](../positioning-strategy/SKILL.md) and [`messaging-architecture`](../messaging-architecture/SKILL.md).
106
+ - Moat reading — route to [`competitive-moat-analysis`](../competitive-moat-analysis/SKILL.md) (P3); this skill composes P3 for the "why us" frame.
107
+ - Roadmap / phase planning — route to roadmap workflows; vision constrains roadmaps, doesn't replace them.
108
+
109
+ ## When the agent should load this
110
+
111
+ - "What's our vision?"
112
+ - "Sense-check our direction — does this still hold?"
113
+ - "Draft the vision section for the board off-site."
114
+ - "Why now / why us frame for the strategy doc."
115
+ - "Wo wollen wir hin und warum jetzt?"
116
+
117
+ ## Output
118
+
119
+ 1. **`vision-frame.md`** — one-sentence vision (cohort + outcome + horizon), three "why now" inflections, three "why us" reasons.
120
+ 2. **`falsifiers.md`** — leading signals + exogenous shifts that would falsify each load-bearing claim.
121
+ 3. **`vision-vs-reality.md`** *(optional)* — when used as a sense-check, the delta between stated vision and current trajectory; named divergence points.
122
+
123
+ ## Gotcha
124
+
125
+ - "Be the X for Y" frames are positioning slogans, not vision. Reject; force cohort-outcome-horizon.
126
+ - "Why now" with no timestamp is a claim, not an inflection. Force a date.
127
+ - "Why us" answers like *"we're a great team"* mean none. Force unique capability / distribution / insight cited with evidence.
128
+ - Vision artifacts that survive zero change in the world over 5 years are slogans. Real visions have falsifiers.
129
+
130
+ ## Do NOT
131
+
132
+ - Do NOT collapse vision into fundraise pitch — different audiences, different proof bars.
133
+ - Do NOT skip the inversion / falsifier step — un-stressed visions are unfalsifiable.
134
+ - Do NOT bolt vision onto roadmap milestones — milestones are downstream of vision, not the same thing.
135
+
136
+ ## Runnable example
137
+
138
+ Series-A vertical SaaS, founder requests vision sense-check for board off-site.
139
+
140
+ - Step 0 — Stance: founder-mode confirmed (off-site context, setting durable frame).
141
+ - Step 1 — Vision: *"In 5 years, mid-size healthcare specialty groups (50–200 providers) deliver patient scheduling with 80 % less administrative load because the workflow is regulation-aware and self-adapting per state."*
142
+ - Step 2 — Why now: (a) state-licensure data became machine-readable in the last 24 months (tech inflection); (b) incumbent hospital-focused vendors hit their growth ceiling and are starting to mis-fit specialty groups (market inflection); (c) post-COVID, specialty-group practice owners control admin spend directly (buyer-behaviour shift, last 36 months).
143
+ - Step 3 — Why us: (a) founder's clinical-ops background + customer-segment access (unique insight, cited with 12 customer interviews); (b) switching-cost moat from 24-month deployment depth (composes P3, cited evidence); (c) HIPAA + 50-state licensure capability stack (unique capability).
144
+ - Step 4 — Falsifiers: (a) incumbent ships specialty-group focused migration tooling = unique-capability claim erodes; (b) state-licensure data becomes opaque again (regulatory rollback) = tech inflection inverts; (c) specialty-group consolidation accelerates so 50–200 cohort shrinks = cohort scope shrinks.
145
+ - Step 5 — Validate: cohort + outcome + horizon present; why-now inflections all dated within 24–36 months; why-us reasons each have falsifier. Pass.
146
+ - Step 6 — Emit vision-frame for board off-site; flag potential H7 derivative needed for the upcoming Series-B narrative.
@@ -39,7 +39,7 @@ schema_version: 1
39
39
  # CI guard: a release bump of `package.json` must update this value
40
40
  # in lockstep — see scripts/check_template_pin_drift.py (road-to-
41
41
  # portable-runtime-and-update-check P3.3).
42
- agent_config_version: "2.7.0"
42
+ agent_config_version: "2.8.0"
43
43
 
44
44
  # --- Project identity ---
45
45
  project:
@@ -18,6 +18,12 @@ DEFAULT_LOG_PATH = Path(".agent-engagement.jsonl")
18
18
  DEFAULT_GRANULARITY = "task"
19
19
  ALLOWED_GRANULARITIES = ("task", "phase-step", "tool-call")
20
20
 
21
+ #: Defaults for the tier-usage signal (Phase 5 of road-to-surface-discipline).
22
+ #: Separate file, separate opt-in, same default-off posture. Contract:
23
+ #: ``docs/contracts/command-clusters.md#tier-usage-signal-contract``.
24
+ DEFAULT_TIER_USAGE_LOG_PATH = Path(".agent-tier-usage.jsonl")
25
+ DEFAULT_TIER_USAGE_RETIER = {"window_days": 30, "min_invocations": 20, "min_distinct_users": 3}
26
+
21
27
 
22
28
  @dataclass(frozen=True)
23
29
  class TelemetrySettings:
@@ -104,9 +110,68 @@ def read_settings(path: Path) -> TelemetrySettings:
104
110
  return settings
105
111
 
106
112
 
113
+ @dataclass(frozen=True)
114
+ class TierUsageSettings:
115
+ enabled: bool
116
+ log_path: Path
117
+ window_days: int
118
+ min_invocations: int
119
+ min_distinct_users: int
120
+
121
+
122
+ def read_tier_usage_settings(path: Path) -> TierUsageSettings:
123
+ """Return parsed tier-usage settings — never raises on missing data.
124
+
125
+ Sibling of :func:`read_settings`; reads the
126
+ ``telemetry.tier_usage`` namespace from ``.agent-settings.yml``.
127
+ Default-off, same parse-tolerant shape — a missing file, a missing
128
+ section, or PyYAML being absent all collapse to ``enabled=False``.
129
+ """
130
+ section: dict[str, Any] = {}
131
+ if path.is_file():
132
+ try:
133
+ import yaml # type: ignore[import-not-found]
134
+ except ImportError:
135
+ yaml = None # type: ignore[assignment]
136
+ if yaml is not None:
137
+ try:
138
+ raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
139
+ except Exception:
140
+ raw = {}
141
+ if isinstance(raw, dict):
142
+ tele = raw.get("telemetry")
143
+ if isinstance(tele, dict):
144
+ tu = tele.get("tier_usage")
145
+ if isinstance(tu, dict):
146
+ section = tu
147
+
148
+ output = section.get("output") if isinstance(section.get("output"), dict) else {}
149
+ retier = section.get("retier") if isinstance(section.get("retier"), dict) else {}
150
+ defaults = DEFAULT_TIER_USAGE_RETIER
151
+
152
+ def _coerce_int(value: Any, default: int) -> int:
153
+ if isinstance(value, bool):
154
+ return default
155
+ if isinstance(value, int) and value >= 0:
156
+ return value
157
+ return default
158
+
159
+ return TierUsageSettings(
160
+ enabled=_coerce_bool(section.get("enabled"), default=False),
161
+ log_path=_coerce_path(output.get("path"), DEFAULT_TIER_USAGE_LOG_PATH),
162
+ window_days=_coerce_int(retier.get("window_days"), defaults["window_days"]),
163
+ min_invocations=_coerce_int(retier.get("min_invocations"), defaults["min_invocations"]),
164
+ min_distinct_users=_coerce_int(retier.get("min_distinct_users"), defaults["min_distinct_users"]),
165
+ )
166
+
167
+
107
168
  __all__ = [
108
169
  "DEFAULT_GRANULARITY",
109
170
  "DEFAULT_LOG_PATH",
171
+ "DEFAULT_TIER_USAGE_LOG_PATH",
172
+ "DEFAULT_TIER_USAGE_RETIER",
110
173
  "TelemetrySettings",
174
+ "TierUsageSettings",
111
175
  "read_settings",
176
+ "read_tier_usage_settings",
112
177
  ]
@@ -0,0 +1,183 @@
1
+ #!/usr/bin/env python3
2
+ """Tier-usage report — aggregate the local tier-usage log into a frequency table.
3
+
4
+ Phase 5 Step 3 of road-to-surface-discipline. Reads the JSONL log
5
+ written by the dispatcher (default ``.agent-tier-usage.jsonl``; override
6
+ via ``telemetry.tier_usage.output.path``) and emits a per-command
7
+ frequency table grouped by tier, plus distinct ``user_hash`` counts.
8
+ Run-local-only; no upload, no remote aggregation.
9
+
10
+ Privacy floor mirrors the contract in
11
+ ``docs/contracts/command-clusters.md#tier-usage-signal-contract`` and
12
+ the four-layer enforcement model used by artefact-engagement telemetry.
13
+ Records that carry any field outside the contract whitelist are dropped
14
+ at the read gate — the report refuses to render leaked shapes rather
15
+ than re-emit them.
16
+
17
+ Usage:
18
+ python3 tier_usage_report.py # last 30d, table
19
+ python3 tier_usage_report.py --window-days 7 # last 7d
20
+ python3 tier_usage_report.py --window-days 0 # full log
21
+ python3 tier_usage_report.py --json # JSON for tooling
22
+ python3 tier_usage_report.py --log-path X.jsonl # archived snapshot
23
+
24
+ Exit codes:
25
+ 0 success or telemetry disabled (single header line)
26
+ 1 no records survived the privacy floor on a non-empty file
27
+ 2 IO error (permission denied; passed path missing)
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import argparse
32
+ import json
33
+ import sys
34
+ from collections import defaultdict
35
+ from datetime import datetime, timedelta, timezone
36
+ from pathlib import Path
37
+ from typing import Any
38
+
39
+ from telemetry.settings import DEFAULT_TIER_USAGE_LOG_PATH, read_tier_usage_settings
40
+
41
+ #: Contract whitelist (see ``docs/contracts/command-clusters.md``).
42
+ ALLOWED_FIELDS = frozenset({"ts_bucket", "command", "tier", "outcome", "user_hash"})
43
+ ALLOWED_OUTCOMES = frozenset({"success", "error", "blocked"})
44
+
45
+
46
+ def _parse_record(raw: str) -> dict[str, Any] | None:
47
+ """Return a sanitized record or ``None`` when the line violates the floor."""
48
+ try:
49
+ obj = json.loads(raw)
50
+ except json.JSONDecodeError:
51
+ return None
52
+ if not isinstance(obj, dict):
53
+ return None
54
+ if not set(obj.keys()).issubset(ALLOWED_FIELDS):
55
+ return None
56
+ cmd = obj.get("command")
57
+ if not isinstance(cmd, str) or not cmd or "/" in cmd or "\\" in cmd:
58
+ return None
59
+ if not isinstance(obj.get("tier"), int) or obj["tier"] not in (0, 1, 2, 3):
60
+ return None
61
+ if obj.get("outcome") not in ALLOWED_OUTCOMES:
62
+ return None
63
+ uh = obj.get("user_hash")
64
+ if not isinstance(uh, str) or len(uh) != 16:
65
+ return None
66
+ if not isinstance(obj.get("ts_bucket"), str):
67
+ return None
68
+ return obj
69
+
70
+
71
+ def _within_window(ts_bucket: str, window_days: int | None) -> bool:
72
+ if window_days is None or window_days == 0:
73
+ return True
74
+ try:
75
+ ts = datetime.fromisoformat(ts_bucket.replace("Z", "+00:00"))
76
+ except ValueError:
77
+ return False
78
+ if ts.tzinfo is None:
79
+ ts = ts.replace(tzinfo=timezone.utc)
80
+ return ts >= datetime.now(timezone.utc) - timedelta(days=window_days)
81
+
82
+
83
+ def aggregate(
84
+ log_path: Path, window_days: int,
85
+ ) -> tuple[dict[tuple[int, str], dict[str, Any]], int, int]:
86
+ """Return ``((tier, command) -> stats, total_lines, kept)`` over the window."""
87
+ buckets: dict[tuple[int, str], dict[str, Any]] = defaultdict(
88
+ lambda: {"count": 0, "users": set()},
89
+ )
90
+ total = 0
91
+ kept = 0
92
+ if not log_path.exists():
93
+ return {}, 0, 0
94
+ with log_path.open("r", encoding="utf-8") as fh:
95
+ for line in fh:
96
+ line = line.strip()
97
+ if not line:
98
+ continue
99
+ total += 1
100
+ rec = _parse_record(line)
101
+ if rec is None:
102
+ continue
103
+ if not _within_window(rec["ts_bucket"], window_days):
104
+ continue
105
+ kept += 1
106
+ key = (int(rec["tier"]), rec["command"])
107
+ buckets[key]["count"] += 1
108
+ buckets[key]["users"].add(rec["user_hash"])
109
+ out = {k: {"count": v["count"], "distinct_users": len(v["users"])}
110
+ for k, v in buckets.items()}
111
+ return out, total, kept
112
+
113
+
114
+ def render(
115
+ table: dict[tuple[int, str], dict[str, Any]],
116
+ window_days: int,
117
+ ) -> str:
118
+ suffix = f" (last {window_days}d)" if window_days else " (full log)"
119
+ if not table:
120
+ return f"(no tier-usage records{suffix})\n"
121
+ rows = sorted(table.items(), key=lambda kv: (kv[0][0], -kv[1]["count"], kv[0][1]))
122
+ header = f"{'Tier':<6}{'Command':<32}{'Calls':>8}{'Users':>8}"
123
+ lines = [header, "-" * len(header)]
124
+ for (tier, command), stats in rows:
125
+ lines.append(
126
+ f"{tier:<6}{command:<32}{stats['count']:>8}{stats['distinct_users']:>8}",
127
+ )
128
+ lines.append(f"\n(window:{suffix.strip()})")
129
+ return "\n".join(lines) + "\n"
130
+
131
+
132
+ def main(argv: list[str] | None = None) -> int:
133
+ parser = argparse.ArgumentParser(description="Tier-usage frequency report.")
134
+ parser.add_argument("--window-days", type=int, default=30,
135
+ help="trailing window in days (0 = full log)")
136
+ parser.add_argument("--json", action="store_true",
137
+ help="emit JSON instead of the table")
138
+ parser.add_argument("--log-path", type=Path, default=None,
139
+ help="override settings; read an archived log")
140
+ parser.add_argument("--settings-file", type=Path, default=Path(".agent-settings.yml"))
141
+ args = parser.parse_args(argv)
142
+
143
+ settings = read_tier_usage_settings(args.settings_file)
144
+ log_path = args.log_path or settings.log_path or DEFAULT_TIER_USAGE_LOG_PATH
145
+
146
+ if args.log_path is None and not settings.enabled:
147
+ sys.stdout.write(
148
+ "(tier-usage telemetry disabled; set "
149
+ "`telemetry.tier_usage.enabled: true` in .agent-settings.yml)\n",
150
+ )
151
+ return 0
152
+
153
+ try:
154
+ table, total, kept = aggregate(log_path, args.window_days)
155
+ except OSError as exc:
156
+ print(f"❌ {exc}", file=sys.stderr)
157
+ return 2
158
+
159
+ if total > 0 and kept == 0:
160
+ print(f"❌ {total} record(s) read; 0 survived the privacy floor — "
161
+ "report refused", file=sys.stderr)
162
+ return 1
163
+
164
+ if args.json:
165
+ payload = {
166
+ "window_days": args.window_days,
167
+ "log_path": str(log_path),
168
+ "records_total": total,
169
+ "records_kept": kept,
170
+ "rows": [
171
+ {"tier": t, "command": c, "count": v["count"],
172
+ "distinct_users": v["distinct_users"]}
173
+ for (t, c), v in sorted(table.items(), key=lambda kv: (kv[0][0], kv[0][1]))
174
+ ],
175
+ }
176
+ sys.stdout.write(json.dumps(payload, indent=2) + "\n")
177
+ else:
178
+ sys.stdout.write(render(table, args.window_days))
179
+ return 0
180
+
181
+
182
+ if __name__ == "__main__":
183
+ sys.exit(main())
@@ -23,8 +23,11 @@ from __future__ import annotations
23
23
 
24
24
  from typing import Any, Iterable
25
25
 
26
+ from ...scoring.decision_trace import summarise_memory, summarise_verify
26
27
  from ...scoring.memory_visibility import (
27
28
  DEFAULT_ASKED_TYPES,
29
+ compute_affected,
30
+ format_changed_decisions_block,
28
31
  format_line,
29
32
  should_emit,
30
33
  summarise_visibility,
@@ -82,20 +85,46 @@ class MemoryVisibilityHook:
82
85
  visibility_off=self._visibility_off,
83
86
  ):
84
87
  return
85
- line = format_line(summary)
88
+ affected = self._derive_affected(work, memory)
89
+ line = format_line(summary, affected=affected)
86
90
  if not line:
87
91
  return
92
+ block = format_changed_decisions_block(
93
+ summary.get("ids") or [], affected,
94
+ )
88
95
  existing = getattr(work, "report", "") or ""
89
- if line in existing:
96
+ rendered = line if block is None else f"{line}\n\n{block}"
97
+ if line in existing and (block is None or block in existing):
90
98
  return
91
99
  sep = "\n\n" if existing else ""
92
100
  try:
93
- work.report = f"{existing}{sep}{line}"
101
+ work.report = f"{existing}{sep}{rendered}"
94
102
  except AttributeError as exc:
95
103
  raise HookError(
96
104
  "memory-visibility: state.report not writable",
97
105
  ) from exc
98
106
 
107
+ def _derive_affected(self, work: Any, memory: Any) -> list[str] | None:
108
+ """Compute the closed-list ``affected`` keys for this work step.
109
+
110
+ Reuses the decision-trace summarisers so the counterfactual
111
+ matches the trace hook's view of the same WorkState. Returns
112
+ ``None`` when memory was not consulted (hits == 0); callers
113
+ then omit the ``· affected: …`` segment per the contract.
114
+ """
115
+ memory_summary = summarise_memory(memory)
116
+ verify_summary = summarise_verify(getattr(work, "verify", None))
117
+ ambiguity = bool(getattr(work, "questions", None))
118
+ return compute_affected(
119
+ memory_hits=memory_summary["hits"],
120
+ verify_claims=verify_summary["claims"],
121
+ verify_first_try_passes=verify_summary["first_try_passes"],
122
+ ambiguity_flag=ambiguity,
123
+ changes=getattr(work, "changes", None),
124
+ applied_rules=getattr(work, "applied_rules", None),
125
+ test_plan=getattr(work, "test_plan", None),
126
+ )
127
+
99
128
 
100
129
  def derive_visibility(memory: Any) -> str | None:
101
130
  """Convenience helper: render the line directly from a memory list.