@event4u/agent-config 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/.agent-src/commands/agent-handoff.md +14 -10
  2. package/.agent-src/commands/chat-history/import.md +170 -0
  3. package/.agent-src/commands/chat-history/learn.md +178 -0
  4. package/.agent-src/commands/chat-history/show.md +17 -18
  5. package/.agent-src/commands/chat-history.md +26 -25
  6. package/.agent-src/commands/council/default.md +4 -7
  7. package/.agent-src/commands/create-pr.md +28 -8
  8. package/.agent-src/commands/sync-gitignore.md +1 -1
  9. package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +76 -0
  10. package/.agent-src/contexts/communication/rules-auto/slash-command-routing-policy-mechanics.md +3 -3
  11. package/.agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +5 -12
  12. package/.agent-src/rules/direct-answers.md +10 -2
  13. package/.agent-src/rules/language-and-tone.md +37 -6
  14. package/.agent-src/rules/no-attribution-footers.md +48 -0
  15. package/.agent-src/rules/no-roadmap-references.md +1 -1
  16. package/.agent-src/rules/skill-quality.md +49 -0
  17. package/.agent-src/rules/user-interaction.md +21 -5
  18. package/.agent-src/skills/ai-council/SKILL.md +4 -5
  19. package/.agent-src/skills/dcf-modeling/SKILL.md +89 -0
  20. package/.agent-src/skills/funnel-analysis/SKILL.md +100 -0
  21. package/.agent-src/skills/md-language-check/SKILL.md +1 -1
  22. package/.agent-src/skills/okr-tree-modeling/SKILL.md +93 -0
  23. package/.agent-src/skills/rice-prioritization/SKILL.md +100 -0
  24. package/.agent-src/skills/subagent-orchestration/SKILL.md +34 -2
  25. package/.agent-src/skills/unit-economics-modeling/SKILL.md +104 -0
  26. package/.agent-src/skills/using-git-worktrees/SKILL.md +1 -0
  27. package/.agent-src/templates/agent-settings.md +5 -26
  28. package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +7 -5
  29. package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +0 -4
  30. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +0 -4
  31. package/.agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.py +7 -51
  32. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.py +1 -2
  33. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.py +1 -2
  34. package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +2 -3
  35. package/.agent-src/templates/skill.md +30 -1
  36. package/.claude-plugin/marketplace.json +8 -4
  37. package/AGENTS.md +44 -3
  38. package/CHANGELOG.md +111 -0
  39. package/README.md +6 -6
  40. package/config/agent-settings.template.yml +19 -13
  41. package/config/gitignore-block.txt +4 -4
  42. package/docs/architecture.md +3 -3
  43. package/docs/catalog.md +14 -12
  44. package/docs/contracts/adr-chat-history-split.md +10 -1
  45. package/docs/contracts/command-clusters.md +1 -1
  46. package/docs/contracts/cross-wing-handoff.md +133 -0
  47. package/docs/contracts/file-ownership-matrix.json +341 -126
  48. package/docs/contracts/hook-architecture-v1.md +8 -1
  49. package/docs/contracts/memory-visibility-v1.md +8 -24
  50. package/docs/customization.md +1 -1
  51. package/docs/getting-started.md +21 -29
  52. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
  53. package/docs/hook-payload-capture.md +221 -0
  54. package/docs/migrations/commands-1.15.0.md +17 -12
  55. package/docs/skills-catalog.md +5 -4
  56. package/llms.txt +4 -3
  57. package/package.json +1 -1
  58. package/scripts/agent-config +1 -1
  59. package/scripts/ai_council/_default_prices.py +4 -4
  60. package/scripts/ai_council/clients.py +1 -1
  61. package/scripts/ai_council/modes.py +3 -4
  62. package/scripts/ai_council/pricing.py +10 -9
  63. package/scripts/build_rule_trigger_matrix.py +1 -9
  64. package/scripts/chat_history.py +952 -596
  65. package/scripts/check_references.py +12 -2
  66. package/scripts/council_cli.py +54 -4
  67. package/scripts/hook_manifest.yaml +33 -0
  68. package/scripts/hooks/augment-chat-history.sh +10 -0
  69. package/scripts/hooks/cowork-dispatcher.sh +98 -0
  70. package/scripts/hooks/dispatch_hook.py +35 -0
  71. package/scripts/hooks_status.py +12 -1
  72. package/scripts/install-hooks.sh +2 -2
  73. package/scripts/install.sh +37 -0
  74. package/scripts/lint_handoffs.py +214 -0
  75. package/scripts/lint_hook_manifest.py +2 -1
  76. package/scripts/redact_hook_capture.py +148 -0
  77. package/scripts/schemas/skill.schema.json +5 -0
  78. package/scripts/skill_linter.py +163 -1
  79. package/scripts/update_prices.py +3 -3
  80. package/.agent-src/commands/chat-history/checkpoint.md +0 -126
  81. package/.agent-src/commands/chat-history/clear.md +0 -103
  82. package/.agent-src/commands/chat-history/resume.md +0 -183
  83. package/.agent-src/rules/chat-history-cadence.md +0 -143
  84. package/.agent-src/rules/chat-history-ownership.md +0 -124
  85. package/.agent-src/rules/chat-history-visibility.md +0 -97
  86. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_heartbeat.py +0 -50
  87. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_turn_check.py +0 -49
  88. package/scripts/check_phase_coupling.py +0 -148
@@ -0,0 +1,93 @@
1
+ ---
2
+ name: okr-tree-modeling
3
+ description: "Use when decomposing a company objective into team OKRs, auditing a draft OKR tree, or stress-testing an existing one for measurability and laddering."
4
+ status: active
5
+ tier: senior
6
+ source: package
7
+ ---
8
+
9
+ # okr-tree-modeling
10
+
11
+ ## When to use
12
+
13
+ - A leadership team wrote a quarterly objective and needs three measurable KRs that actually move it.
14
+ - A draft OKR tree needs review for orphan KRs, vanity metrics, or KRs that ladder to two parents.
15
+ - A team-level OKR set needs to be checked against the company objective it claims to serve.
16
+
17
+ Do NOT use for ranking competing initiatives within one OKR — that's prioritization, not decomposition (route elsewhere — see Related Skills).
18
+
19
+ ## Procedure
20
+
21
+ ### Step 0: Inspect
22
+
23
+ 1. Identify the cognition cluster. OKR-as-strategy lives near the CEO/COO; OKR-as-PM-tool lives near the head of product. The decomposition mechanic is the same; the ladder-up target differs.
24
+ 2. Confirm timeframe (quarter / half / year) — KR cadence depends on it.
25
+
26
+ ### Step 1: Lock the parent objective
27
+
28
+ 1. Restate the parent in one sentence with **a verb of change** (grow, reduce, ship, win) and an outcome — not an output.
29
+ 2. Bad: "Improve onboarding." Good: "Reduce time-to-first-value for new accounts to under 7 days."
30
+ 3. If the parent has no verb of change or no outcome, the tree is built on sand. Stop and rewrite the parent.
31
+
32
+ ### Step 2: Decompose into 3 KRs
33
+
34
+ 1. Each KR must satisfy four tests: (a) measurable end-state, (b) the team owns the lever, (c) achievable iff the parent is achieved, (d) failing it should be visibly bad.
35
+ 2. Three is the floor and the ceiling. One KR makes the objective brittle; five dilutes ownership.
36
+ 3. Anti-pattern: "ship X feature" as a KR. Shipping is an output. The KR is what changes when the feature lands.
37
+
38
+ ### Step 3: Cascade to team-level
39
+
40
+ 1. For each company KR, define 2–3 team-level KRs that, taken together, achieve the parent KR — not duplicate it.
41
+ 2. Anti-pattern: copy-paste the parent KR with a smaller number ("60% of company KR is 30% for our team"). Real cascades change shape — input metrics for some teams, leading indicators for others.
42
+ 3. Mark each team KR as **trailing** (lagging outcome — revenue, retention) or **leading** (input the team controls — activation rate, response time). A tree that is all-trailing is unactionable.
43
+
44
+ ### Step 4: Cadence + check-ins
45
+
46
+ 1. Weekly check-in: leading KRs only. Trailing KRs are reviewed monthly.
47
+ 2. End of period: confidence score (0.0–1.0) per KR, written 3× across the period to surface drift.
48
+
49
+ ### Step 5: Validate
50
+
51
+ 1. Walk the tree top-down: does every leaf KR ladder cleanly to exactly one parent KR?
52
+ 2. Walk it bottom-up: if every team hits 0.7 confidence, does the company objective land?
53
+ 3. Count vanity metrics — KRs that move but don't matter. If more than zero, rewrite.
54
+
55
+ ## Gotcha
56
+
57
+ - "100% of teams adopt X" is a participation metric, not an outcome KR. The model loves to write these.
58
+ - A KR owned by two teams is owned by no team. Single accountability is non-negotiable.
59
+ - Stretch goals that nobody believes are achievable produce sandbagging on the OKR after, not stretch.
60
+ - "Customer happiness" + NPS is a sentiment proxy, not an outcome metric. Tie KRs to behavior (renewal, expansion, usage), not feeling.
61
+
62
+ ## Do NOT
63
+
64
+ - Do NOT write more than 3 KRs per objective — focus is the entire point.
65
+ - Do NOT use "achieve $X revenue" as a KR for a team that doesn't own pricing or pipeline; that's setting them up to fail blameably.
66
+ - Do NOT cascade by simple percentage allocation — different teams contribute different mechanisms, not different fractions.
67
+
68
+ ## Related Skills
69
+
70
+ **WHEN to use this**
71
+
72
+ - The ask is decomposition of an objective into measurable KRs.
73
+ - A draft OKR tree needs structural review (cascade integrity, leading/trailing balance).
74
+
75
+ **WHEN NOT to use this**
76
+
77
+ - Prioritization of competing features inside one KR — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
78
+ - Conversion-rate diagnosis on a funnel KR — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
79
+ - Valuation-impact modeling of strategic objectives — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
80
+
81
+ ## When the agent should load this
82
+
83
+ - "Help me write OKRs for next quarter."
84
+ - "Review this OKR tree — does it ladder up?"
85
+ - "What KRs would actually move our retention objective?"
86
+ - "Are these team OKRs measurable?"
87
+ - "We have 8 KRs per team — too many?"
88
+
89
+ ## Output
90
+
91
+ 1. **`okr-tree.md`** — markdown tree: company objective → 3 company KRs → 2–3 team KRs each. Each leaf carries owner, measure, target, trailing/leading tag, check-in cadence.
92
+ 2. **`cascade-audit.md`** — orphans (KRs with no parent), oversubscribed parents (KR with >3 children), vanity metrics flagged with a one-line replacement suggestion.
93
+ 3. **`confidence-template.md`** — empty 3-column table (start / mid / end) per KR, ready for the period's confidence updates.
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: rice-prioritization
3
+ description: "Use when ranking competing initiatives for a roadmap, breaking a tie between two features, or auditing a backlog for hidden low-value work via Reach × Impact × Confidence ÷ Effort."
4
+ status: active
5
+ tier: senior
6
+ source: package
7
+ ---
8
+
9
+ # rice-prioritization
10
+
11
+ ## When to use
12
+
13
+ - A backlog has more candidates than capacity for the next quarter and someone has to pick.
14
+ - A PM and an engineering lead disagree on what ships first and need a shared framework.
15
+ - A draft roadmap reads like a wish list — no transparency on **why** these and not those.
16
+
17
+ Do NOT use for valuation, OKR decomposition, or funnel-stage diagnosis (see Related Skills).
18
+
19
+ ## Procedure
20
+
21
+ ### Step 0: Inspect
22
+
23
+ 1. Confirm there are at least 5 candidates. RICE on 2 items is theatre; just argue the merits.
24
+ 2. Confirm there is a shared definition of the **target user** for "Reach" — RICE breaks if two scorers count different populations.
25
+
26
+ ### Step 1: Score Reach
27
+
28
+ 1. Reach = number of users / events / requests **per fixed time window** (per quarter is the default).
29
+ 2. Use absolute counts pulled from analytics or product DB, not percentages — percentages hide tiny denominators.
30
+ 3. If the data isn't there, write the query you'd run and say so. Do not invent numbers.
31
+
32
+ ### Step 2: Score Impact
33
+
34
+ 1. Use the canonical 5-point scale: 0.25 (minimal) · 0.5 (low) · 1 (medium) · 2 (high) · 3 (massive).
35
+ 2. Anchor each level with a concrete past shipped feature ("medium = like the search filter we shipped Q2"). Without anchors, scorers drift.
36
+ 3. Impact is **per affected user**, not aggregate. Aggregate is what RICE produces, not what you input.
37
+
38
+ ### Step 3: Score Confidence
39
+
40
+ 1. Confidence is a **percentage** — 100 / 80 / 50 / "low and we should not score this yet".
41
+ 2. Anything below 50 means: stop, do a spike or a research week, then re-score. RICE does not rescue ignorance.
42
+ 3. Confidence multiplies — it is the model's discount for unknown unknowns.
43
+
44
+ ### Step 4: Score Effort
45
+
46
+ 1. Effort = person-months for the smallest viable shippable slice. Not the fantasy version.
47
+ 2. Engineering owns this number. PMs scoring effort is the most common process failure.
48
+ 3. Effort < 0.5 person-months almost always means scope is underestimated — surface and ask.
49
+
50
+ ### Step 5: Compute and rank
51
+
52
+ 1. RICE = `(Reach × Impact × Confidence) / Effort`.
53
+ 2. Rank descending. The score is the artefact, not the answer — read the top 5 with a critical eye.
54
+ 3. Anti-pattern: treating RICE rank as a contract. It is a structured argument, not a verdict.
55
+
56
+ ### Step 6: Audit the bottom
57
+
58
+ 1. Look at the bottom quartile. If a strategic must-have lives there, the model has a calibration error — usually Reach or Impact.
59
+ 2. Look at the top item. If it is obviously absurd (e.g. one ad-hoc admin tool above a strategic platform play), the input scoring is uncalibrated.
60
+
61
+ ## Gotcha
62
+
63
+ - Reach in percentages hides "this feature affects 100% of … 12 users."
64
+ - Impact inflation: every PM thinks every feature is a 2 or 3. Force at least 30% of items to score 0.5 or below.
65
+ - Confidence is the only multiplier that punishes uncertainty — do not let it default to 80 for everything.
66
+ - Effort discrepancy between PM and engineering on the same row is itself the signal — investigate, do not average.
67
+
68
+ ## Do NOT
69
+
70
+ - Do NOT rank fewer than 5 candidates with RICE — overhead exceeds value.
71
+ - Do NOT mix strategic bets and BAU tickets in the same RICE table; their effort scales differ by 10×.
72
+ - Do NOT ship a roadmap that is exactly the RICE-sorted top-N — you need at least one strategic outlier with a written rationale.
73
+
74
+ ## Related Skills
75
+
76
+ **WHEN to use this**
77
+
78
+ - Ranking is the actual question.
79
+ - The team needs a shared, auditable scoring frame.
80
+
81
+ **WHEN NOT to use this**
82
+
83
+ - Decomposing an objective into KRs — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
84
+ - Diagnosing why a funnel stage drops — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
85
+ - Modelling whether an investment is worth its capital cost — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
86
+ - CAC / LTV / payback questions — route to [`unit-economics-modeling`](../unit-economics-modeling/SKILL.md).
87
+
88
+ ## When the agent should load this
89
+
90
+ - "Help me prioritize the backlog for Q3."
91
+ - "RICE-score these features."
92
+ - "Why is X above Y on the roadmap?"
93
+ - "We have 30 ideas and 6 engineers — what ships?"
94
+ - "Audit our roadmap for low-value work."
95
+
96
+ ## Output
97
+
98
+ 1. **`rice-table.md`** — markdown table: Item · Reach · Impact · Confidence · Effort · RICE · Owner · Notes. Sorted descending by RICE.
99
+ 2. **`calibration-notes.md`** — one paragraph per anchor (what "Impact = 2" means with a named past feature) plus a list of items with confidence < 50 marked for spike-first.
100
+ 3. **`top-5-critique.md`** — one paragraph per top-5 item: is the rank defensible, and what would change it.
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: subagent-orchestration
3
- description: "Use when orchestrating implementer/judge subagents — five modes (do-and-judge, do-in-steps, do-in-parallel, do-competitively, judge-with-debate) — models from .agent-settings.yml."
3
+ description: "Use when orchestrating implementer/judge subagents — six modes (do-and-judge, do-in-steps, do-in-parallel, do-competitively, judge-with-debate, do-in-worktrees) — models from .agent-settings.yml."
4
4
  source: package
5
5
  ---
6
6
 
@@ -44,7 +44,7 @@ judge is a fresh pair of eyes. If `.agent-settings.yml` resolves to
44
44
  identical implementer and judge models, surface the mismatch before
45
45
  running — do not silently continue.
46
46
 
47
- ## The five modes
47
+ ## The six modes
48
48
 
49
49
  Each mode has a decision row: when to use, when not, and the expected
50
50
  model pairing. Defaults come from
@@ -100,6 +100,38 @@ migration, public API) where a single judge is too easy to fool.
100
100
  |---|---|---|
101
101
  | Security, data integrity, public API change | Routine internal refactor | judges = same tier (2x); meta-judge = one tier up |
102
102
 
103
+ ### 6. do-in-worktrees
104
+
105
+ Cross-wing or cross-skill chain executed across isolated git
106
+ worktrees — each handoff in the chain runs in its own worktree, so
107
+ the workspace state of one step never leaks into the next. Operationalizes
108
+ the worktree boundary clause in
109
+ [`docs/contracts/cross-wing-handoff.md`](../../../docs/contracts/cross-wing-handoff.md)
110
+ § 3. State-machine layer only — worktree creation/destruction lives
111
+ in [`using-git-worktrees`](../using-git-worktrees/SKILL.md) and
112
+ [`finishing-a-development-branch`](../finishing-a-development-branch/SKILL.md).
113
+
114
+ | When to use | When not | Model pairing |
115
+ |---|---|---|
116
+ | Multi-step cross-wing chain (≥2 senior skills, each ≥30 min) where one step's open files / branch state would confuse the next | Fast iteration where each step < 30 min — worktree overhead exceeds isolation benefit | implementers = same tier per step; judge = one tier up at chain end |
117
+
118
+ **Handoff shape:** initiator-skill emits the typed output declared in
119
+ its `## Output` block → control passes to delegated-skill in a fresh
120
+ worktree → delegated-skill consumes the input shape declared in its
121
+ `## Input` (or `## When the agent should load this`) block. The
122
+ handoff is auditable; `lint_handoffs.py` validates the chain.
123
+
124
+ **Example chain (W3 launch):** `positioning` (worktree A) →
125
+ `messaging-architecture` (worktree B, consumes positioning's
126
+ `positioning-statement.md`) → `gtm-launch` (worktree C, consumes
127
+ both prior artifacts). Each worktree carries one branch; the chain
128
+ end produces a single integration PR.
129
+
130
+ **Anti-pattern:** do not use for fast iteration loops where each
131
+ step is under ~30 minutes. The branch-creation, context-switch, and
132
+ worktree-cleanup cost dominates. Stick with mode 1 (do-and-judge)
133
+ or mode 2 (do-in-steps) for those.
134
+
103
135
  ## Procedure
104
136
 
105
137
  ### 1. Inspect the task shape
@@ -0,0 +1,104 @@
1
+ ---
2
+ name: unit-economics-modeling
3
+ description: "Use when modeling CAC, LTV, gross-margin payback, or contribution margin per customer — for SaaS, marketplace, or transactional businesses."
4
+ status: active
5
+ tier: senior
6
+ source: package
7
+ ---
8
+
9
+ # unit-economics-modeling
10
+
11
+ ## When to use
12
+
13
+ - A board ask: "is this business unit-economic?" — needs CAC / LTV / payback, not vibes.
14
+ - A new channel is scaling and the question is whether the CAC payback period is sustainable.
15
+ - A pricing or packaging change needs to be tested against contribution margin per cohort.
16
+
17
+ Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage diagnosis, or backlog ranking (see Related Skills).
18
+
19
+ ## Procedure
20
+
21
+ ### Step 0: Inspect
22
+
23
+ 1. Confirm the business shape — SaaS / marketplace / transactional. The three canonical cases differ in **revenue recognition** and **churn definition**, not in arithmetic.
24
+ 2. Confirm a fully-loaded CAC is computable: paid spend + sales comp + content/SEO allocation + tooling. Marketing-spend-only CAC is a vanity metric.
25
+
26
+ ### Step 1: Compute CAC per channel
27
+
28
+ 1. CAC = `(fully-loaded acquisition spend in window) / (new paying customers acquired in same window)`. Match window to sales-cycle length, not calendar quarter.
29
+ 2. Compute by channel **and** blended. Blended-only hides the channel that is breaking the average.
30
+ 3. Anti-pattern: counting trial signups as customers. Customer = first paid charge cleared.
31
+
32
+ ### Step 2: Compute gross margin
33
+
34
+ 1. Gross margin = `(revenue − COGS) / revenue`. COGS includes hosting, payment fees, third-party APIs the customer's usage drives, and direct customer-success cost.
35
+ 2. Gross margin must be **per dollar of revenue**, not per customer. Per-customer gross margin is contribution margin (Step 3).
36
+ 3. SaaS healthy band: 70–85%. Marketplace: 15–40%. Transactional: 5–25%. Outside these — the business is mislabelled or the COGS allocation is wrong.
37
+
38
+ ### Step 3: Compute LTV
39
+
40
+ 1. Pick the canonical formula for the case:
41
+ - **SaaS:** `LTV = ARPA × gross_margin / monthly_churn_rate`. Use net-dollar churn for self-serve, gross logo churn for high-touch.
42
+ - **Marketplace:** `LTV = take_rate × GMV_per_user × retention_curve_AUC` over 24 months. Steady-state extrapolation is dishonest below 24 months of cohort data.
43
+ - **Transactional:** `LTV = avg_order_value × gross_margin × purchases_per_year × avg_lifetime_years`.
44
+ 2. Cap implied lifetime at 5 years for any business with < 3 years of cohort history. Anything longer is a fairy tale.
45
+ 3. State the formula used inline. Do not let the reader infer.
46
+
47
+ ### Step 4: Compute payback and ratio
48
+
49
+ 1. **CAC payback** (months) = `CAC / (ARPA × gross_margin)` for SaaS; analogue for marketplace and transactional. Healthy SaaS: ≤ 12 months.
50
+ 2. **LTV / CAC ratio**: target ≥ 3.0. Below 1.5 is acquisition-loss territory; above 5.0 means under-investment in growth (or bad LTV math).
51
+ 3. Both numbers, not one. Payback drives capital efficiency; ratio drives long-run economics.
52
+
53
+ ### Step 5: Cohort the answer
54
+
55
+ 1. Run Steps 1–4 by signup-quarter cohort. Trends matter more than the point estimate.
56
+ 2. If LTV/CAC is improving but payback is lengthening, you are buying retention with discounting — flag.
57
+ 3. If both deteriorate, the channel mix has shifted to a worse channel — segment by channel to find the leak.
58
+
59
+ ### Step 6: Validate
60
+
61
+ 1. Sanity-check LTV against revenue retention. If implied LTV > 8× annual revenue per customer with monthly churn > 2%, the math is wrong.
62
+ 2. Sanity-check CAC against fully-loaded P&L. If channel CACs sum to less than total acquisition spend, allocations are missing.
63
+
64
+ ## Gotcha
65
+
66
+ - Marketing-spend-only CAC is the most common deception. Sales comp, BDR salaries, content production, and tooling all belong in fully-loaded CAC.
67
+ - Net-dollar retention > 100% does not justify ignoring logo churn — they answer different questions.
68
+ - ARPA averaged across plan tiers hides churn concentrated in one tier. Compute per tier when tiers differ in price by more than 2×.
69
+ - Payback period using contribution margin (post variable-cost) is honest; payback using gross revenue is the kind of math VCs see in pitch decks and discount on sight.
70
+
71
+ ## Do NOT
72
+
73
+ - Do NOT extrapolate LTV beyond observable cohort data without saying so explicitly.
74
+ - Do NOT mix freemium activation rates with paid CAC; they live in different universes.
75
+ - Do NOT report a single LTV/CAC for a business with multiple distinct customer segments — segment first.
76
+
77
+ ## Related Skills
78
+
79
+ **WHEN to use this**
80
+
81
+ - The question is per-customer economics (CAC, LTV, payback, contribution margin).
82
+ - The decision is whether to scale a channel or pricing tier.
83
+
84
+ **WHEN NOT to use this**
85
+
86
+ - Whole-business intrinsic value with terminal value — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
87
+ - Diagnosing where conversion drops — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
88
+ - Ranking competing initiatives — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
89
+ - Setting team objectives that move these metrics — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
90
+
91
+ ## When the agent should load this
92
+
93
+ - "What's our LTV / CAC?"
94
+ - "Is this channel paying back fast enough?"
95
+ - "Compute unit economics for this pricing tier."
96
+ - "Are we unit-economic at this CAC?"
97
+ - "Cohort our payback period."
98
+
99
+ ## Output
100
+
101
+ 1. **`unit-econ-table.md`** — table per channel and blended: CAC · ARPA · gross margin · payback months · LTV · LTV/CAC. With cohort columns (last 4 quarters).
102
+ 2. **`assumptions.md`** — formula chosen (SaaS / marketplace / transactional), churn definition, COGS allocation method, lifetime cap. One bullet per choice.
103
+ 3. **`cohort-trend.md`** — trend chart (ASCII or markdown table) of CAC, payback, LTV/CAC over the last 4–8 cohorts. Annotate channel-mix shifts.
104
+ 4. **`sanity-checks.md`** — explicit cross-checks (LTV vs annual revenue, channel CAC sum vs P&L). Flag any that fail with a one-line investigation pointer.
@@ -15,6 +15,7 @@ source: package
15
15
  * Experimenting with a refactor that may be thrown away — a throwaway
16
16
  worktree is cheaper than a throwaway commit
17
17
  * A long-running build or test suite is busy in the current worktree
18
+ * `subagent-orchestration` mode 6 (`do-in-worktrees`) was selected for a cross-wing chain — this skill is the executor that creates the per-step isolated worktrees the chain expects
18
19
 
19
20
  Do NOT use when:
20
21
 
@@ -122,7 +122,7 @@ eloquent:
122
122
 
123
123
  # --- Chat history (crash recovery) ---
124
124
  #
125
- # Persistent JSONL log at .agent-chat-history (project root, git-ignored).
125
+ # Persistent JSONL log at agents/.agent-chat-history (project root, git-ignored).
126
126
  # Keeps a durable record of the conversation so a crashed or switched
127
127
  # agent session can be resumed. See scripts/chat_history.py for the API.
128
128
  #
@@ -141,26 +141,6 @@ chat_history:
141
141
  # Overflow behavior: rotate (drop oldest) | compress (summarize)
142
142
  on_overflow: rotate
143
143
 
144
- # Heartbeat marker visibility: on | off | hybrid
145
- # on — print marker every reply (~20 tokens/reply, legacy)
146
- # off — never print (zero tokens, no drift signal)
147
- # hybrid — print only on drift (missing/foreign/returning); silent otherwise
148
- # YAML 1.1 booleanizes bare on/off — both are accepted, no quoting needed.
149
- heartbeat: hybrid
150
-
151
- # Population path: hook | checkpoint | manual
152
- # hook — platform fires lifecycle hooks; agent observes only
153
- # (Claude Code, Augment CLI, Cursor 1.7+, Cline non-Windows,
154
- # Windsurf, Gemini CLI). scripts/install.py wires hooks.
155
- # checkpoint — agent invokes /chat-history-checkpoint at phase boundaries
156
- # (Augment IDE plugin, Cursor < 1.7, Cline on Windows).
157
- # Cooperative three-gate Iron Law applies.
158
- # manual — rule is inert (cloud surfaces). Persistence is local-only.
159
- # Default `checkpoint` is the safest cooperative fallback. HOOK platforms
160
- # set this to `hook` automatically when scripts/install.py merges the
161
- # platform's settings file.
162
- path: checkpoint
163
-
164
144
  # --- Work-engine hooks ---
165
145
  #
166
146
  # Lifecycle hook surface of the `work_engine` Python engine
@@ -197,7 +177,7 @@ hooks:
197
177
  # routing drift.
198
178
  directive_set_guard: true
199
179
 
200
- # Chat-history hooks — populate .agent-chat-history structurally from
180
+ # Chat-history hooks — populate agents/.agent-chat-history structurally from
201
181
  # the engine. Gated by BOTH this block AND the global
202
182
  # chat_history.enabled above; either off → no chat-history hook
203
183
  # registers. Keep both on for the HOOK path; flip either off to fall
@@ -363,18 +343,17 @@ lives under `personal:` in YAML.
363
343
  | `project.improvement_pr_branch_prefix` | string | `improve/agent-` | Branch prefix for agent improvement PRs. |
364
344
  | `github.pr_reply_method` | `replies_endpoint`, `create_review_comment`, `auto` | `create_review_comment` | GitHub API method for replying to PR review comments. `auto` detects on first use. |
365
345
  | `eloquent.access_style` | `getters_setters`, `get_attribute`, `magic_properties` | `getters_setters` | How to access Eloquent model attributes. See `eloquent` skill for details. |
366
- | `chat_history.enabled` | `true`, `false` | `true` | Persist chat events to `.agent-chat-history` (JSONL) for crash recovery. |
346
+ | `chat_history.enabled` | `true`, `false` | `true` | Persist chat events to `agents/.agent-chat-history` (JSONL) for crash recovery. |
367
347
  | `chat_history.frequency` | `per_turn`, `per_phase`, `per_tool` | per profile | Logging granularity. Defaults: `minimal`→`per_turn`, `balanced`→`per_phase`, `full`→`per_tool`. |
368
348
  | `chat_history.max_size_kb` | integer | per profile | Max file size before overflow handling. Defaults: `minimal`→`128`, `balanced`→`256`, `full`→`512`. |
369
349
  | `chat_history.on_overflow` | `rotate`, `compress` | per profile | On overflow: `rotate` drops oldest entries; `compress` marks the file for summarization on the next turn. Defaults: `minimal`/`balanced`→`rotate`, `full`→`compress`. |
370
- | `chat_history.heartbeat` | `on`, `off`, `hybrid` | `hybrid` | Visibility of the `📒 chat-history:` marker. `on` = every reply (~20 tokens), `off` = silent, `hybrid` = print only on drift states (`missing`/`foreign`/`returning`). YAML `on`/`off` accepted bare. |
371
- | `chat_history.path` | `hook`, `checkpoint`, `manual` | `checkpoint` | Population path. `hook` = platform fires lifecycle hooks; `checkpoint` = agent invokes `/chat-history-checkpoint` at phase boundaries; `manual` = rule inert (cloud). `scripts/install.py` flips this to `hook` when the platform's hook config is deployed. See [`agents/contexts/chat-history-platform-hooks.md`](../../../agents/contexts/chat-history-platform-hooks.md). |
350
+ | `chat_history.text_limits.{user,agent,tool,phase}` | integer (chars) | `user=0`, `agent=5000`, `tool=200`, `phase=200` | Per-entry-type text-length cap. `0` = verbatim, no slice. `N > 0` = collapse whitespace, slice to N chars, append `" [+K chars]"` so the log self-reports truncation. Defaults match `DEFAULT_TEXT_LIMITS` in `scripts/chat_history.py`. |
372
351
  | `hooks.enabled` | `true`, `false` | `false` | Master switch for the work-engine hook layer. When `false` (default) the registry stays empty and golden replay is byte-stable. See [`agents/contexts/work-engine-hooks.md`](../../../agents/contexts/work-engine-hooks.md). |
373
352
  | `hooks.trace` | `true`, `false` | `false` | Emit per-event trace lines on stderr. Useful for debugging; off by default because it is noisy. |
374
353
  | `hooks.halt_surface_audit` | `true`, `false` | `true` | Defense-in-depth check that every halt surfaced by the dispatcher carries the expected shape. Cheap. |
375
354
  | `hooks.state_shape_validation` | `true`, `false` | `true` | Re-run the state schema validator on `AFTER_LOAD` and `BEFORE_SAVE`. Cheap, catches drift. |
376
355
  | `hooks.directive_set_guard` | `true`, `false` | `true` | Verify the dispatcher-resolved directive set matches the input envelope intent. Cheap, catches routing drift. |
377
- | `hooks.chat_history.enabled` | `true`, `false` | `true` | Register the four chat-history hooks (turn-check, append, halt-append, heartbeat). Gated by **both** this flag AND `chat_history.enabled`; either off → no chat-history hook registers. |
356
+ | `hooks.chat_history.enabled` | `true`, `false` | `true` | Register chat-history hooks (`append` on `after_step`, `halt_append` on `on_halt`). Gated by **both** this flag AND `chat_history.enabled`; either off → no chat-history hook registers. Schema v4: every entry self-identifies via 16-char session fingerprint, no ownership/sidecar layer. |
378
357
  | `hooks.chat_history.script` | path | `scripts/chat_history.py` | Override path to the chat-history CLI. Set only when the script lives outside the standard location. |
379
358
  | `pipelines.skill_improvement` | `true`, `false` | `true` | When `true`: propose learning capture after meaningful tasks. When `false`: silent. Included in every profile except `custom`. |
380
359
  | `roadmap.quality_cadence` | `end_of_roadmap`, `per_phase`, `per_step` | `end_of_roadmap` | When `/roadmap execute` runs the project's quality pipeline. Default skips per-step / per-phase runs and gates only the final archival. `per_phase` runs once after every phase; `per_step` is the legacy verbose mode. Step checkboxes and the dashboard are always updated regardless. `verify-before-complete` still requires fresh output before any "roadmap complete" claim. |
@@ -17,8 +17,6 @@ from .hooks import HookRegistry
17
17
  from .hooks.builtin import (
18
18
  ChatHistoryAppendHook,
19
19
  ChatHistoryHaltAppendHook,
20
- ChatHistoryHeartbeatHook,
21
- ChatHistoryTurnCheckHook,
22
20
  DecisionTraceHook,
23
21
  DirectiveSetGuardHook,
24
22
  HaltSurfaceAuditHook,
@@ -74,12 +72,16 @@ def _build_hook_registry(args: argparse.Namespace) -> HookRegistry:
74
72
  def _register_chat_history_hooks(
75
73
  registry: HookRegistry, settings: HookSettings,
76
74
  ) -> None:
77
- """Register the four chat-history hooks bound to the configured script."""
75
+ """Register the structural chat-history hooks bound to the configured script.
76
+
77
+ Hook-only contract (post road-to-chat-history-hook-only): only the
78
+ append + halt-append hooks remain; cooperative ``turn-check`` /
79
+ ``heartbeat`` hooks were removed when the cooperative always-rules
80
+ were retired.
81
+ """
78
82
  script = Path(settings.chat_history_script)
79
- ChatHistoryTurnCheckHook(script).register(registry)
80
83
  ChatHistoryAppendHook(script).register(registry)
81
84
  ChatHistoryHaltAppendHook(script).register(registry)
82
- ChatHistoryHeartbeatHook(script).register(registry)
83
85
 
84
86
 
85
87
  __all__ = ["_build_hook_registry", "_register_chat_history_hooks"]
@@ -22,8 +22,6 @@ from __future__ import annotations
22
22
  from .builtin import (
23
23
  ChatHistoryAppendHook,
24
24
  ChatHistoryHaltAppendHook,
25
- ChatHistoryHeartbeatHook,
26
- ChatHistoryTurnCheckHook,
27
25
  DecisionTraceHook,
28
26
  DirectiveSetGuardHook,
29
27
  HaltSurfaceAuditHook,
@@ -40,8 +38,6 @@ from .runner import HookRunner
40
38
  __all__ = [
41
39
  "ChatHistoryAppendHook",
42
40
  "ChatHistoryHaltAppendHook",
43
- "ChatHistoryHeartbeatHook",
44
- "ChatHistoryTurnCheckHook",
45
41
  "DecisionTraceHook",
46
42
  "DirectiveSetGuardHook",
47
43
  "HaltSurfaceAuditHook",
@@ -13,8 +13,6 @@ from __future__ import annotations
13
13
 
14
14
  from .chat_history_append import ChatHistoryAppendHook
15
15
  from .chat_history_halt_append import ChatHistoryHaltAppendHook
16
- from .chat_history_heartbeat import ChatHistoryHeartbeatHook
17
- from .chat_history_turn_check import ChatHistoryTurnCheckHook
18
16
  from .decision_trace import DecisionTraceHook
19
17
  from .directive_set_guard import DirectiveSetGuardHook
20
18
  from .halt_surface_audit import HaltSurfaceAuditHook
@@ -25,8 +23,6 @@ from .trace import TraceHook
25
23
  __all__ = [
26
24
  "ChatHistoryAppendHook",
27
25
  "ChatHistoryHaltAppendHook",
28
- "ChatHistoryHeartbeatHook",
29
- "ChatHistoryTurnCheckHook",
30
26
  "DecisionTraceHook",
31
27
  "DirectiveSetGuardHook",
32
28
  "HaltSurfaceAuditHook",
@@ -12,9 +12,6 @@ import sys
12
12
  from pathlib import Path
13
13
  from typing import Callable, Sequence
14
14
 
15
- from ..context import HookContext
16
- from ..exceptions import HookError
17
-
18
15
  ProcessRunner = Callable[[Sequence[str]], "subprocess.CompletedProcess[str]"]
19
16
  """Callable that runs a subprocess. Production default: ``_default_runner``."""
20
17
 
@@ -28,65 +25,24 @@ def _default_runner(cmd: Sequence[str]) -> "subprocess.CompletedProcess[str]":
28
25
  return subprocess.run(list(cmd), capture_output=True, text=True, check=False)
29
26
 
30
27
 
31
- def _derive_first_user_msg(ctx: HookContext) -> str | None:
32
- """Pull a stable first-user-msg out of the available context.
28
+ class _ChatHistoryHookBase:
29
+ """Shared plumbing script path and runner.
33
30
 
34
- CLI-layer events carry ``ctx.work`` (the v1 envelope); dispatcher-layer
35
- events (``before_step`` / ``after_step`` / ``on_halt``) carry only
36
- ``ctx.delivery`` (the legacy :class:`DeliveryState`). Both shapes feed
37
- the same ``id: title`` / ``raw`` derivation so chat-history entries
38
- stay stable across the lifecycle. Returns ``None`` when the shape is
39
- unknown — callers raise ``HookError`` so the runner converts it to
40
- a warning.
31
+ Schema v4 derives session attribution from the platform ``session_id``
32
+ (passed by the platform-hook dispatcher), not from a derived
33
+ first-user-msg. work-engine internal hooks have no platform session
34
+ in scope, so they omit ``--session-id`` and entries land in the
35
+ ``<unknown>`` session bucket.
41
36
  """
42
- work = ctx.work
43
- if work is not None and getattr(work, "input", None) is not None:
44
- inp = work.input
45
- data = getattr(inp, "data", None) or {}
46
- kind = getattr(inp, "kind", None)
47
- if kind == "prompt":
48
- raw = data.get("raw")
49
- if raw:
50
- return str(raw)
51
- elif kind == "ticket":
52
- joined = _ticket_msg(data)
53
- if joined:
54
- return joined
55
-
56
- delivery = ctx.delivery
57
- if delivery is not None:
58
- ticket = getattr(delivery, "ticket", None) or {}
59
- joined = _ticket_msg(ticket)
60
- if joined:
61
- return joined
62
- return None
63
-
64
-
65
- def _ticket_msg(ticket: dict) -> str:
66
- ticket_id = ticket.get("id") or ""
67
- title = ticket.get("title") or ""
68
- return f"{ticket_id}: {title}".strip(": ").strip()
69
-
70
-
71
- class _ChatHistoryHookBase:
72
- """Shared plumbing — script path, runner, and first-msg derivation."""
73
37
 
74
38
  def __init__(
75
39
  self,
76
40
  script_path: Path,
77
41
  *,
78
42
  runner: ProcessRunner | None = None,
79
- first_user_msg: str | None = None,
80
43
  ) -> None:
81
44
  self.script_path = Path(script_path)
82
45
  self._runner = runner or _default_runner
83
- self._fixed_msg = first_user_msg
84
-
85
- def _resolve_msg(self, ctx: HookContext) -> str:
86
- msg = self._fixed_msg or _derive_first_user_msg(ctx)
87
- if not msg:
88
- raise HookError("chat-history hook: cannot derive first-user-msg")
89
- return msg
90
46
 
91
47
  def _invoke(self, *args: str) -> "subprocess.CompletedProcess[str]":
92
48
  cmd = [sys.executable, str(self.script_path), *args]
@@ -29,10 +29,9 @@ class ChatHistoryAppendHook(_ChatHistoryHookBase):
29
29
  result = ctx.result
30
30
  if result is None or getattr(result, "outcome", None) != Outcome.SUCCESS:
31
31
  return
32
- msg = self._resolve_msg(ctx)
33
32
  payload: dict[str, Any] = {"step": ctx.step_name or "<unknown>"}
34
33
  proc = self._invoke(
35
- "append", "--first-user-msg", msg,
34
+ "append",
36
35
  "--type", "phase", "--json", json.dumps(payload),
37
36
  )
38
37
  if proc.returncode != EXIT_OK:
@@ -22,7 +22,6 @@ class ChatHistoryHaltAppendHook(_ChatHistoryHookBase):
22
22
  registry.register(HookEvent.ON_HALT, self._on_halt)
23
23
 
24
24
  def _on_halt(self, ctx: HookContext) -> None:
25
- msg = self._resolve_msg(ctx)
26
25
  questions: list[str] = []
27
26
  if ctx.result is not None:
28
27
  questions = list(getattr(ctx.result, "questions", []) or [])
@@ -30,7 +29,7 @@ class ChatHistoryHaltAppendHook(_ChatHistoryHookBase):
30
29
  questions = list(getattr(ctx.delivery, "questions", []) or [])
31
30
  payload = {"step": ctx.step_name or "<unknown>", "questions": questions}
32
31
  proc = self._invoke(
33
- "append", "--first-user-msg", msg,
32
+ "append",
34
33
  "--type", "decision", "--json", json.dumps(payload),
35
34
  )
36
35
  if proc.returncode != EXIT_OK:
@@ -4,10 +4,9 @@ Implements the producer side of
4
4
  ``docs/contracts/memory-visibility-v1.md``: derive ``asks/hits/ids``
5
5
  from ``state.memory`` and thread the rendered line into
6
6
  ``state.report`` so the agent's reply naturally carries the memory
7
- heartbeat.
7
+ visibility marker.
8
8
 
9
- Fires on ``before_save`` for the same reason as
10
- ``ChatHistoryHeartbeatHook``: ``cli._sync_back`` runs between
9
+ Fires on ``before_save``: ``cli._sync_back`` runs between
11
10
  ``after_dispatch`` and ``before_save`` and reassigns
12
11
  ``work.report = delivery.report``. A line written on
13
12
  ``after_dispatch`` would be overwritten before ``_save``; firing on