npm - @event4u/agent-config - Versions diffs - 1.19.0 → 1.20.0 - Mend

@event4u/agent-config 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/.agent-src/commands/agent-handoff.md +14 -10
package/.agent-src/commands/chat-history/import.md +170 -0
package/.agent-src/commands/chat-history/learn.md +178 -0
package/.agent-src/commands/chat-history/show.md +17 -18
package/.agent-src/commands/chat-history.md +26 -25
package/.agent-src/commands/council/default.md +4 -7
package/.agent-src/commands/create-pr.md +28 -8
package/.agent-src/commands/sync-gitignore.md +1 -1
package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +76 -0
package/.agent-src/contexts/communication/rules-auto/slash-command-routing-policy-mechanics.md +3 -3
package/.agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +5 -12
package/.agent-src/rules/direct-answers.md +10 -2
package/.agent-src/rules/language-and-tone.md +37 -6
package/.agent-src/rules/no-attribution-footers.md +48 -0
package/.agent-src/rules/no-roadmap-references.md +1 -1
package/.agent-src/rules/skill-quality.md +49 -0
package/.agent-src/rules/user-interaction.md +21 -5
package/.agent-src/skills/ai-council/SKILL.md +4 -5
package/.agent-src/skills/dcf-modeling/SKILL.md +89 -0
package/.agent-src/skills/funnel-analysis/SKILL.md +100 -0
package/.agent-src/skills/md-language-check/SKILL.md +1 -1
package/.agent-src/skills/okr-tree-modeling/SKILL.md +93 -0
package/.agent-src/skills/rice-prioritization/SKILL.md +100 -0
package/.agent-src/skills/subagent-orchestration/SKILL.md +34 -2
package/.agent-src/skills/unit-economics-modeling/SKILL.md +104 -0
package/.agent-src/skills/using-git-worktrees/SKILL.md +1 -0
package/.agent-src/templates/agent-settings.md +5 -26
package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +7 -5
package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +0 -4
package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +0 -4
package/.agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.py +7 -51
package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.py +1 -2
package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.py +1 -2
package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +2 -3
package/.agent-src/templates/skill.md +30 -1
package/.claude-plugin/marketplace.json +8 -4
package/AGENTS.md +44 -3
package/CHANGELOG.md +111 -0
package/README.md +6 -6
package/config/agent-settings.template.yml +19 -13
package/config/gitignore-block.txt +4 -4
package/docs/architecture.md +3 -3
package/docs/catalog.md +14 -12
package/docs/contracts/adr-chat-history-split.md +10 -1
package/docs/contracts/command-clusters.md +1 -1
package/docs/contracts/cross-wing-handoff.md +133 -0
package/docs/contracts/file-ownership-matrix.json +341 -126
package/docs/contracts/hook-architecture-v1.md +8 -1
package/docs/contracts/memory-visibility-v1.md +8 -24
package/docs/customization.md +1 -1
package/docs/getting-started.md +21 -29
package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
package/docs/hook-payload-capture.md +221 -0
package/docs/migrations/commands-1.15.0.md +17 -12
package/docs/skills-catalog.md +5 -4
package/llms.txt +4 -3
package/package.json +1 -1
package/scripts/agent-config +1 -1
package/scripts/ai_council/_default_prices.py +4 -4
package/scripts/ai_council/clients.py +1 -1
package/scripts/ai_council/modes.py +3 -4
package/scripts/ai_council/pricing.py +10 -9
package/scripts/build_rule_trigger_matrix.py +1 -9
package/scripts/chat_history.py +952 -596
package/scripts/check_references.py +12 -2
package/scripts/council_cli.py +54 -4
package/scripts/hook_manifest.yaml +33 -0
package/scripts/hooks/augment-chat-history.sh +10 -0
package/scripts/hooks/cowork-dispatcher.sh +98 -0
package/scripts/hooks/dispatch_hook.py +35 -0
package/scripts/hooks_status.py +12 -1
package/scripts/install-hooks.sh +2 -2
package/scripts/install.sh +37 -0
package/scripts/lint_handoffs.py +214 -0
package/scripts/lint_hook_manifest.py +2 -1
package/scripts/redact_hook_capture.py +148 -0
package/scripts/schemas/skill.schema.json +5 -0
package/scripts/skill_linter.py +163 -1
package/scripts/update_prices.py +3 -3
package/.agent-src/commands/chat-history/checkpoint.md +0 -126
package/.agent-src/commands/chat-history/clear.md +0 -103
package/.agent-src/commands/chat-history/resume.md +0 -183
package/.agent-src/rules/chat-history-cadence.md +0 -143
package/.agent-src/rules/chat-history-ownership.md +0 -124
package/.agent-src/rules/chat-history-visibility.md +0 -97
package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_heartbeat.py +0 -50
package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_turn_check.py +0 -49
package/scripts/check_phase_coupling.py +0 -148

package/.agent-src/skills/okr-tree-modeling/SKILL.md ADDED Viewed

@@ -0,0 +1,93 @@
+---
+name: okr-tree-modeling
+description: "Use when decomposing a company objective into team OKRs, auditing a draft OKR tree, or stress-testing an existing one for measurability and laddering."
+status: active
+tier: senior
+source: package
+---
+# okr-tree-modeling
+## When to use
+- A leadership team wrote a quarterly objective and needs three measurable KRs that actually move it.
+- A draft OKR tree needs review for orphan KRs, vanity metrics, or KRs that ladder to two parents.
+- A team-level OKR set needs to be checked against the company objective it claims to serve.
+Do NOT use for ranking competing initiatives within one OKR — that's prioritization, not decomposition (route elsewhere — see Related Skills).
+## Procedure
+### Step 0: Inspect
+1. Identify the cognition cluster. OKR-as-strategy lives near the CEO/COO; OKR-as-PM-tool lives near the head of product. The decomposition mechanic is the same; the ladder-up target differs.
+2. Confirm timeframe (quarter / half / year) — KR cadence depends on it.
+### Step 1: Lock the parent objective
+1. Restate the parent in one sentence with **a verb of change** (grow, reduce, ship, win) and an outcome — not an output.
+2. Bad: "Improve onboarding." Good: "Reduce time-to-first-value for new accounts to under 7 days."
+3. If the parent has no verb of change or no outcome, the tree is built on sand. Stop and rewrite the parent.
+### Step 2: Decompose into 3 KRs
+1. Each KR must satisfy four tests: (a) measurable end-state, (b) the team owns the lever, (c) achievable iff the parent is achieved, (d) failing it should be visibly bad.
+2. Three is the floor and the ceiling. One KR makes the objective brittle; five dilutes ownership.
+3. Anti-pattern: "ship X feature" as a KR. Shipping is an output. The KR is what changes when the feature lands.
+### Step 3: Cascade to team-level
+1. For each company KR, define 2–3 team-level KRs that, taken together, achieve the parent KR — not duplicate it.
+2. Anti-pattern: copy-paste the parent KR with a smaller number ("60% of company KR is 30% for our team"). Real cascades change shape — input metrics for some teams, leading indicators for others.
+3. Mark each team KR as **trailing** (lagging outcome — revenue, retention) or **leading** (input the team controls — activation rate, response time). A tree that is all-trailing is unactionable.
+### Step 4: Cadence + check-ins
+1. Weekly check-in: leading KRs only. Trailing KRs are reviewed monthly.
+2. End of period: confidence score (0.0–1.0) per KR, written 3× across the period to surface drift.
+### Step 5: Validate
+1. Walk the tree top-down: does every leaf KR ladder cleanly to exactly one parent KR?
+2. Walk it bottom-up: if every team hits 0.7 confidence, does the company objective land?
+3. Count vanity metrics — KRs that move but don't matter. If more than zero, rewrite.
+## Gotcha
+- "100% of teams adopt X" is a participation metric, not an outcome KR. The model loves to write these.
+- A KR owned by two teams is owned by no team. Single accountability is non-negotiable.
+- Stretch goals that nobody believes are achievable produce sandbagging on the OKR after, not stretch.
+- "Customer happiness" + NPS is a sentiment proxy, not an outcome metric. Tie KRs to behavior (renewal, expansion, usage), not feeling.
+## Do NOT
+- Do NOT write more than 3 KRs per objective — focus is the entire point.
+- Do NOT use "achieve $X revenue" as a KR for a team that doesn't own pricing or pipeline; that's setting them up to fail blameably.
+- Do NOT cascade by simple percentage allocation — different teams contribute different mechanisms, not different fractions.
+## Related Skills
+**WHEN to use this**
+- The ask is decomposition of an objective into measurable KRs.
+- A draft OKR tree needs structural review (cascade integrity, leading/trailing balance).
+**WHEN NOT to use this**
+- Prioritization of competing features inside one KR — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
+- Conversion-rate diagnosis on a funnel KR — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
+- Valuation-impact modeling of strategic objectives — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
+## When the agent should load this
+- "Help me write OKRs for next quarter."
+- "Review this OKR tree — does it ladder up?"
+- "What KRs would actually move our retention objective?"
+- "Are these team OKRs measurable?"
+- "We have 8 KRs per team — too many?"
+## Output
+1. **`okr-tree.md`** — markdown tree: company objective → 3 company KRs → 2–3 team KRs each. Each leaf carries owner, measure, target, trailing/leading tag, check-in cadence.
+2. **`cascade-audit.md`** — orphans (KRs with no parent), oversubscribed parents (KR with >3 children), vanity metrics flagged with a one-line replacement suggestion.
+3. **`confidence-template.md`** — empty 3-column table (start / mid / end) per KR, ready for the period's confidence updates.

package/.agent-src/skills/rice-prioritization/SKILL.md ADDED Viewed

@@ -0,0 +1,100 @@
+---
+name: rice-prioritization
+description: "Use when ranking competing initiatives for a roadmap, breaking a tie between two features, or auditing a backlog for hidden low-value work via Reach × Impact × Confidence ÷ Effort."
+status: active
+tier: senior
+source: package
+---
+# rice-prioritization
+## When to use
+- A backlog has more candidates than capacity for the next quarter and someone has to pick.
+- A PM and an engineering lead disagree on what ships first and need a shared framework.
+- A draft roadmap reads like a wish list — no transparency on **why** these and not those.
+Do NOT use for valuation, OKR decomposition, or funnel-stage diagnosis (see Related Skills).
+## Procedure
+### Step 0: Inspect
+1. Confirm there are at least 5 candidates. RICE on 2 items is theatre; just argue the merits.
+2. Confirm there is a shared definition of the **target user** for "Reach" — RICE breaks if two scorers count different populations.
+### Step 1: Score Reach
+1. Reach = number of users / events / requests **per fixed time window** (per quarter is the default).
+2. Use absolute counts pulled from analytics or product DB, not percentages — percentages hide tiny denominators.
+3. If the data isn't there, write the query you'd run and say so. Do not invent numbers.
+### Step 2: Score Impact
+1. Use the canonical 5-point scale: 0.25 (minimal) · 0.5 (low) · 1 (medium) · 2 (high) · 3 (massive).
+2. Anchor each level with a concrete past shipped feature ("medium = like the search filter we shipped Q2"). Without anchors, scorers drift.
+3. Impact is **per affected user**, not aggregate. Aggregate is what RICE produces, not what you input.
+### Step 3: Score Confidence
+1. Confidence is a **percentage** — 100 / 80 / 50 / "low and we should not score this yet".
+2. Anything below 50 means: stop, do a spike or a research week, then re-score. RICE does not rescue ignorance.
+3. Confidence multiplies — it is the model's discount for unknown unknowns.
+### Step 4: Score Effort
+1. Effort = person-months for the smallest viable shippable slice. Not the fantasy version.
+2. Engineering owns this number. PMs scoring effort is the most common process failure.
+3. Effort < 0.5 person-months almost always means scope is underestimated — surface and ask.
+### Step 5: Compute and rank
+1. RICE = `(Reach × Impact × Confidence) / Effort`.
+2. Rank descending. The score is the artefact, not the answer — read the top 5 with a critical eye.
+3. Anti-pattern: treating RICE rank as a contract. It is a structured argument, not a verdict.
+### Step 6: Audit the bottom
+1. Look at the bottom quartile. If a strategic must-have lives there, the model has a calibration error — usually Reach or Impact.
+2. Look at the top item. If it is obviously absurd (e.g. one ad-hoc admin tool above a strategic platform play), the input scoring is uncalibrated.
+## Gotcha
+- Reach in percentages hides "this feature affects 100% of … 12 users."
+- Impact inflation: every PM thinks every feature is a 2 or 3. Force at least 30% of items to score 0.5 or below.
+- Confidence is the only multiplier that punishes uncertainty — do not let it default to 80 for everything.
+- Effort discrepancy between PM and engineering on the same row is itself the signal — investigate, do not average.
+## Do NOT
+- Do NOT rank fewer than 5 candidates with RICE — overhead exceeds value.
+- Do NOT mix strategic bets and BAU tickets in the same RICE table; their effort scales differ by 10×.
+- Do NOT ship a roadmap that is exactly the RICE-sorted top-N — you need at least one strategic outlier with a written rationale.
+## Related Skills
+**WHEN to use this**
+- Ranking is the actual question.
+- The team needs a shared, auditable scoring frame.
+**WHEN NOT to use this**
+- Decomposing an objective into KRs — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
+- Diagnosing why a funnel stage drops — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
+- Modelling whether an investment is worth its capital cost — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
+- CAC / LTV / payback questions — route to [`unit-economics-modeling`](../unit-economics-modeling/SKILL.md).
+## When the agent should load this
+- "Help me prioritize the backlog for Q3."
+- "RICE-score these features."
+- "Why is X above Y on the roadmap?"
+- "We have 30 ideas and 6 engineers — what ships?"
+- "Audit our roadmap for low-value work."
+## Output
+1. **`rice-table.md`** — markdown table: Item · Reach · Impact · Confidence · Effort · RICE · Owner · Notes. Sorted descending by RICE.
+2. **`calibration-notes.md`** — one paragraph per anchor (what "Impact = 2" means with a named past feature) plus a list of items with confidence < 50 marked for spike-first.
+3. **`top-5-critique.md`** — one paragraph per top-5 item: is the rank defensible, and what would change it.

package/.agent-src/skills/subagent-orchestration/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: subagent-orchestration
-description: "Use when orchestrating implementer/judge subagents — five modes (do-and-judge, do-in-steps, do-in-parallel, do-competitively, judge-with-debate) — models from .agent-settings.yml."
+description: "Use when orchestrating implementer/judge subagents — six modes (do-and-judge, do-in-steps, do-in-parallel, do-competitively, judge-with-debate, do-in-worktrees) — models from .agent-settings.yml."
 source: package
 ---
@@ -44,7 +44,7 @@ judge is a fresh pair of eyes. If `.agent-settings.yml` resolves to
 identical implementer and judge models, surface the mismatch before
 running — do not silently continue.
-## The five modes
+## The six modes
 Each mode has a decision row: when to use, when not, and the expected
 model pairing. Defaults come from
@@ -100,6 +100,38 @@ migration, public API) where a single judge is too easy to fool.
 |---|---|---|
 | Security, data integrity, public API change | Routine internal refactor | judges = same tier (2x); meta-judge = one tier up |
+### 6. do-in-worktrees
+Cross-wing or cross-skill chain executed across isolated git
+worktrees — each handoff in the chain runs in its own worktree, so
+the workspace state of one step never leaks into the next. Operationalizes
+the worktree boundary clause in
+[`docs/contracts/cross-wing-handoff.md`](../../../docs/contracts/cross-wing-handoff.md)
+§ 3. State-machine layer only — worktree creation/destruction lives
+in [`using-git-worktrees`](../using-git-worktrees/SKILL.md) and
+[`finishing-a-development-branch`](../finishing-a-development-branch/SKILL.md).
+| When to use | When not | Model pairing |
+|---|---|---|
+| Multi-step cross-wing chain (≥2 senior skills, each ≥30 min) where one step's open files / branch state would confuse the next | Fast iteration where each step < 30 min — worktree overhead exceeds isolation benefit | implementers = same tier per step; judge = one tier up at chain end |
+**Handoff shape:** initiator-skill emits the typed output declared in
+its `## Output` block → control passes to delegated-skill in a fresh
+worktree → delegated-skill consumes the input shape declared in its
+`## Input` (or `## When the agent should load this`) block. The
+handoff is auditable; `lint_handoffs.py` validates the chain.
+**Example chain (W3 launch):** `positioning` (worktree A) →
+`messaging-architecture` (worktree B, consumes positioning's
+`positioning-statement.md`) → `gtm-launch` (worktree C, consumes
+both prior artifacts). Each worktree carries one branch; the chain
+end produces a single integration PR.
+**Anti-pattern:** do not use for fast iteration loops where each
+step is under ~30 minutes. The branch-creation, context-switch, and
+worktree-cleanup cost dominates. Stick with mode 1 (do-and-judge)
+or mode 2 (do-in-steps) for those.
 ## Procedure
 ### 1. Inspect the task shape

package/.agent-src/skills/unit-economics-modeling/SKILL.md ADDED Viewed

@@ -0,0 +1,104 @@
+---
+name: unit-economics-modeling
+description: "Use when modeling CAC, LTV, gross-margin payback, or contribution margin per customer — for SaaS, marketplace, or transactional businesses."
+status: active
+tier: senior
+source: package
+---
+# unit-economics-modeling
+## When to use
+- A board ask: "is this business unit-economic?" — needs CAC / LTV / payback, not vibes.
+- A new channel is scaling and the question is whether the CAC payback period is sustainable.
+- A pricing or packaging change needs to be tested against contribution margin per cohort.
+Do NOT use for full-business intrinsic-value modeling, OKR setting, funnel-stage diagnosis, or backlog ranking (see Related Skills).
+## Procedure
+### Step 0: Inspect
+1. Confirm the business shape — SaaS / marketplace / transactional. The three canonical cases differ in **revenue recognition** and **churn definition**, not in arithmetic.
+2. Confirm a fully-loaded CAC is computable: paid spend + sales comp + content/SEO allocation + tooling. Marketing-spend-only CAC is a vanity metric.
+### Step 1: Compute CAC per channel
+1. CAC = `(fully-loaded acquisition spend in window) / (new paying customers acquired in same window)`. Match window to sales-cycle length, not calendar quarter.
+2. Compute by channel **and** blended. Blended-only hides the channel that is breaking the average.
+3. Anti-pattern: counting trial signups as customers. Customer = first paid charge cleared.
+### Step 2: Compute gross margin
+1. Gross margin = `(revenue − COGS) / revenue`. COGS includes hosting, payment fees, third-party APIs the customer's usage drives, and direct customer-success cost.
+2. Gross margin must be **per dollar of revenue**, not per customer. Per-customer gross margin is contribution margin (Step 3).
+3. SaaS healthy band: 70–85%. Marketplace: 15–40%. Transactional: 5–25%. Outside these — the business is mislabelled or the COGS allocation is wrong.
+### Step 3: Compute LTV
+1. Pick the canonical formula for the case:
+   - **SaaS:** `LTV = ARPA × gross_margin / monthly_churn_rate`. Use net-dollar churn for self-serve, gross logo churn for high-touch.
+   - **Marketplace:** `LTV = take_rate × GMV_per_user × retention_curve_AUC` over 24 months. Steady-state extrapolation is dishonest below 24 months of cohort data.
+   - **Transactional:** `LTV = avg_order_value × gross_margin × purchases_per_year × avg_lifetime_years`.
+2. Cap implied lifetime at 5 years for any business with < 3 years of cohort history. Anything longer is a fairy tale.
+3. State the formula used inline. Do not let the reader infer.
+### Step 4: Compute payback and ratio
+1. **CAC payback** (months) = `CAC / (ARPA × gross_margin)` for SaaS; analogue for marketplace and transactional. Healthy SaaS: ≤ 12 months.
+2. **LTV / CAC ratio**: target ≥ 3.0. Below 1.5 is acquisition-loss territory; above 5.0 means under-investment in growth (or bad LTV math).
+3. Both numbers, not one. Payback drives capital efficiency; ratio drives long-run economics.
+### Step 5: Cohort the answer
+1. Run Steps 1–4 by signup-quarter cohort. Trends matter more than the point estimate.
+2. If LTV/CAC is improving but payback is lengthening, you are buying retention with discounting — flag.
+3. If both deteriorate, the channel mix has shifted to a worse channel — segment by channel to find the leak.
+### Step 6: Validate
+1. Sanity-check LTV against revenue retention. If implied LTV > 8× annual revenue per customer with monthly churn > 2%, the math is wrong.
+2. Sanity-check CAC against fully-loaded P&L. If channel CACs sum to less than total acquisition spend, allocations are missing.
+## Gotcha
+- Marketing-spend-only CAC is the most common deception. Sales comp, BDR salaries, content production, and tooling all belong in fully-loaded CAC.
+- Net-dollar retention > 100% does not justify ignoring logo churn — they answer different questions.
+- ARPA averaged across plan tiers hides churn concentrated in one tier. Compute per tier when tiers differ in price by more than 2×.
+- Payback period using contribution margin (post variable-cost) is honest; payback using gross revenue is the kind of math VCs see in pitch decks and discount on sight.
+## Do NOT
+- Do NOT extrapolate LTV beyond observable cohort data without saying so explicitly.
+- Do NOT mix freemium activation rates with paid CAC; they live in different universes.
+- Do NOT report a single LTV/CAC for a business with multiple distinct customer segments — segment first.
+## Related Skills
+**WHEN to use this**
+- The question is per-customer economics (CAC, LTV, payback, contribution margin).
+- The decision is whether to scale a channel or pricing tier.
+**WHEN NOT to use this**
+- Whole-business intrinsic value with terminal value — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
+- Diagnosing where conversion drops — route to [`funnel-analysis`](../funnel-analysis/SKILL.md).
+- Ranking competing initiatives — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
+- Setting team objectives that move these metrics — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
+## When the agent should load this
+- "What's our LTV / CAC?"
+- "Is this channel paying back fast enough?"
+- "Compute unit economics for this pricing tier."
+- "Are we unit-economic at this CAC?"
+- "Cohort our payback period."
+## Output
+1. **`unit-econ-table.md`** — table per channel and blended: CAC · ARPA · gross margin · payback months · LTV · LTV/CAC. With cohort columns (last 4 quarters).
+2. **`assumptions.md`** — formula chosen (SaaS / marketplace / transactional), churn definition, COGS allocation method, lifetime cap. One bullet per choice.
+3. **`cohort-trend.md`** — trend chart (ASCII or markdown table) of CAC, payback, LTV/CAC over the last 4–8 cohorts. Annotate channel-mix shifts.
+4. **`sanity-checks.md`** — explicit cross-checks (LTV vs annual revenue, channel CAC sum vs P&L). Flag any that fail with a one-line investigation pointer.

package/.agent-src/skills/using-git-worktrees/SKILL.md CHANGED Viewed

@@ -15,6 +15,7 @@ source: package
 * Experimenting with a refactor that may be thrown away — a throwaway
   worktree is cheaper than a throwaway commit
 * A long-running build or test suite is busy in the current worktree
+* `subagent-orchestration` mode 6 (`do-in-worktrees`) was selected for a cross-wing chain — this skill is the executor that creates the per-step isolated worktrees the chain expects
 Do NOT use when:

package/.agent-src/templates/agent-settings.md CHANGED Viewed

@@ -122,7 +122,7 @@ eloquent:
 # --- Chat history (crash recovery) ---
 #
-# Persistent JSONL log at .agent-chat-history (project root, git-ignored).
+# Persistent JSONL log at agents/.agent-chat-history (project root, git-ignored).
 # Keeps a durable record of the conversation so a crashed or switched
 # agent session can be resumed. See scripts/chat_history.py for the API.
 #
@@ -141,26 +141,6 @@ chat_history:
   # Overflow behavior: rotate (drop oldest) | compress (summarize)
   on_overflow: rotate
-  # Heartbeat marker visibility: on | off | hybrid
-  #   on     — print marker every reply (~20 tokens/reply, legacy)
-  #   off    — never print (zero tokens, no drift signal)
-  #   hybrid — print only on drift (missing/foreign/returning); silent otherwise
-  # YAML 1.1 booleanizes bare on/off — both are accepted, no quoting needed.
-  heartbeat: hybrid
-  # Population path: hook | checkpoint | manual
-  #   hook       — platform fires lifecycle hooks; agent observes only
-  #                (Claude Code, Augment CLI, Cursor 1.7+, Cline non-Windows,
-  #                 Windsurf, Gemini CLI). scripts/install.py wires hooks.
-  #   checkpoint — agent invokes /chat-history-checkpoint at phase boundaries
-  #                (Augment IDE plugin, Cursor < 1.7, Cline on Windows).
-  #                Cooperative three-gate Iron Law applies.
-  #   manual     — rule is inert (cloud surfaces). Persistence is local-only.
-  # Default `checkpoint` is the safest cooperative fallback. HOOK platforms
-  # set this to `hook` automatically when scripts/install.py merges the
-  # platform's settings file.
-  path: checkpoint
 # --- Work-engine hooks ---
 #
 # Lifecycle hook surface of the `work_engine` Python engine
@@ -197,7 +177,7 @@ hooks:
   # routing drift.
   directive_set_guard: true
-  # Chat-history hooks — populate .agent-chat-history structurally from
+  # Chat-history hooks — populate agents/.agent-chat-history structurally from
   # the engine. Gated by BOTH this block AND the global
   # chat_history.enabled above; either off → no chat-history hook
   # registers. Keep both on for the HOOK path; flip either off to fall
@@ -363,18 +343,17 @@ lives under `personal:` in YAML.
 | `project.improvement_pr_branch_prefix` | string | `improve/agent-` | Branch prefix for agent improvement PRs. |
 | `github.pr_reply_method` | `replies_endpoint`, `create_review_comment`, `auto` | `create_review_comment` | GitHub API method for replying to PR review comments. `auto` detects on first use. |
 | `eloquent.access_style` | `getters_setters`, `get_attribute`, `magic_properties` | `getters_setters` | How to access Eloquent model attributes. See `eloquent` skill for details. |
-| `chat_history.enabled` | `true`, `false` | `true` | Persist chat events to `.agent-chat-history` (JSONL) for crash recovery. |
+| `chat_history.enabled` | `true`, `false` | `true` | Persist chat events to `agents/.agent-chat-history` (JSONL) for crash recovery. |
 | `chat_history.frequency` | `per_turn`, `per_phase`, `per_tool` | per profile | Logging granularity. Defaults: `minimal`→`per_turn`, `balanced`→`per_phase`, `full`→`per_tool`. |
 | `chat_history.max_size_kb` | integer | per profile | Max file size before overflow handling. Defaults: `minimal`→`128`, `balanced`→`256`, `full`→`512`. |
 | `chat_history.on_overflow` | `rotate`, `compress` | per profile | On overflow: `rotate` drops oldest entries; `compress` marks the file for summarization on the next turn. Defaults: `minimal`/`balanced`→`rotate`, `full`→`compress`. |
-| `chat_history.heartbeat` | `on`, `off`, `hybrid` | `hybrid` | Visibility of the `📒 chat-history:` marker. `on` = every reply (~20 tokens), `off` = silent, `hybrid` = print only on drift states (`missing`/`foreign`/`returning`). YAML `on`/`off` accepted bare. |
-| `chat_history.path` | `hook`, `checkpoint`, `manual` | `checkpoint` | Population path. `hook` = platform fires lifecycle hooks; `checkpoint` = agent invokes `/chat-history-checkpoint` at phase boundaries; `manual` = rule inert (cloud). `scripts/install.py` flips this to `hook` when the platform's hook config is deployed. See [`agents/contexts/chat-history-platform-hooks.md`](../../../agents/contexts/chat-history-platform-hooks.md). |
+| `chat_history.text_limits.{user,agent,tool,phase}` | integer (chars) | `user=0`, `agent=5000`, `tool=200`, `phase=200` | Per-entry-type text-length cap. `0` = verbatim, no slice. `N > 0` = collapse whitespace, slice to N chars, append `" … [+K chars]"` so the log self-reports truncation. Defaults match `DEFAULT_TEXT_LIMITS` in `scripts/chat_history.py`. |
 | `hooks.enabled` | `true`, `false` | `false` | Master switch for the work-engine hook layer. When `false` (default) the registry stays empty and golden replay is byte-stable. See [`agents/contexts/work-engine-hooks.md`](../../../agents/contexts/work-engine-hooks.md). |
 | `hooks.trace` | `true`, `false` | `false` | Emit per-event trace lines on stderr. Useful for debugging; off by default because it is noisy. |
 | `hooks.halt_surface_audit` | `true`, `false` | `true` | Defense-in-depth check that every halt surfaced by the dispatcher carries the expected shape. Cheap. |
 | `hooks.state_shape_validation` | `true`, `false` | `true` | Re-run the state schema validator on `AFTER_LOAD` and `BEFORE_SAVE`. Cheap, catches drift. |
 | `hooks.directive_set_guard` | `true`, `false` | `true` | Verify the dispatcher-resolved directive set matches the input envelope intent. Cheap, catches routing drift. |
-| `hooks.chat_history.enabled` | `true`, `false` | `true` | Register the four chat-history hooks (turn-check, append, halt-append, heartbeat). Gated by **both** this flag AND `chat_history.enabled`; either off → no chat-history hook registers. |
+| `hooks.chat_history.enabled` | `true`, `false` | `true` | Register chat-history hooks (`append` on `after_step`, `halt_append` on `on_halt`). Gated by **both** this flag AND `chat_history.enabled`; either off → no chat-history hook registers. Schema v4: every entry self-identifies via 16-char session fingerprint, no ownership/sidecar layer. |
 | `hooks.chat_history.script` | path | `scripts/chat_history.py` | Override path to the chat-history CLI. Set only when the script lives outside the standard location. |
 | `pipelines.skill_improvement` | `true`, `false` | `true` | When `true`: propose learning capture after meaningful tasks. When `false`: silent. Included in every profile except `custom`. |
 | `roadmap.quality_cadence` | `end_of_roadmap`, `per_phase`, `per_step` | `end_of_roadmap` | When `/roadmap execute` runs the project's quality pipeline. Default skips per-step / per-phase runs and gates only the final archival. `per_phase` runs once after every phase; `per_step` is the legacy verbose mode. Step checkboxes and the dashboard are always updated regardless. `verify-before-complete` still requires fresh output before any "roadmap complete" claim. |

package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py CHANGED Viewed

@@ -17,8 +17,6 @@ from .hooks import HookRegistry
 from .hooks.builtin import (
     ChatHistoryAppendHook,
     ChatHistoryHaltAppendHook,
-    ChatHistoryHeartbeatHook,
-    ChatHistoryTurnCheckHook,
     DecisionTraceHook,
     DirectiveSetGuardHook,
     HaltSurfaceAuditHook,
@@ -74,12 +72,16 @@ def _build_hook_registry(args: argparse.Namespace) -> HookRegistry:
 def _register_chat_history_hooks(
     registry: HookRegistry, settings: HookSettings,
 ) -> None:
-    """Register the four chat-history hooks bound to the configured script."""
+    """Register the structural chat-history hooks bound to the configured script.
+    Hook-only contract (post road-to-chat-history-hook-only): only the
+    append + halt-append hooks remain; cooperative ``turn-check`` /
+    ``heartbeat`` hooks were removed when the cooperative always-rules
+    were retired.
+    """
     script = Path(settings.chat_history_script)
-    ChatHistoryTurnCheckHook(script).register(registry)
     ChatHistoryAppendHook(script).register(registry)
     ChatHistoryHaltAppendHook(script).register(registry)
-    ChatHistoryHeartbeatHook(script).register(registry)
 __all__ = ["_build_hook_registry", "_register_chat_history_hooks"]

package/.agent-src/templates/scripts/work_engine/hooks/__init__.py CHANGED Viewed

@@ -22,8 +22,6 @@ from __future__ import annotations
 from .builtin import (
     ChatHistoryAppendHook,
     ChatHistoryHaltAppendHook,
-    ChatHistoryHeartbeatHook,
-    ChatHistoryTurnCheckHook,
     DecisionTraceHook,
     DirectiveSetGuardHook,
     HaltSurfaceAuditHook,
@@ -40,8 +38,6 @@ from .runner import HookRunner
 __all__ = [
     "ChatHistoryAppendHook",
     "ChatHistoryHaltAppendHook",
-    "ChatHistoryHeartbeatHook",
-    "ChatHistoryTurnCheckHook",
     "DecisionTraceHook",
     "DirectiveSetGuardHook",
     "HaltSurfaceAuditHook",

package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py CHANGED Viewed

@@ -13,8 +13,6 @@ from __future__ import annotations
 from .chat_history_append import ChatHistoryAppendHook
 from .chat_history_halt_append import ChatHistoryHaltAppendHook
-from .chat_history_heartbeat import ChatHistoryHeartbeatHook
-from .chat_history_turn_check import ChatHistoryTurnCheckHook
 from .decision_trace import DecisionTraceHook
 from .directive_set_guard import DirectiveSetGuardHook
 from .halt_surface_audit import HaltSurfaceAuditHook
@@ -25,8 +23,6 @@ from .trace import TraceHook
 __all__ = [
     "ChatHistoryAppendHook",
     "ChatHistoryHaltAppendHook",
-    "ChatHistoryHeartbeatHook",
-    "ChatHistoryTurnCheckHook",
     "DecisionTraceHook",
     "DirectiveSetGuardHook",
     "HaltSurfaceAuditHook",

package/.agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.py CHANGED Viewed

@@ -12,9 +12,6 @@ import sys
 from pathlib import Path
 from typing import Callable, Sequence
-from ..context import HookContext
-from ..exceptions import HookError
 ProcessRunner = Callable[[Sequence[str]], "subprocess.CompletedProcess[str]"]
 """Callable that runs a subprocess. Production default: ``_default_runner``."""
@@ -28,65 +25,24 @@ def _default_runner(cmd: Sequence[str]) -> "subprocess.CompletedProcess[str]":
     return subprocess.run(list(cmd), capture_output=True, text=True, check=False)
-def _derive_first_user_msg(ctx: HookContext) -> str | None:
-    """Pull a stable first-user-msg out of the available context.
+class _ChatHistoryHookBase:
+    """Shared plumbing — script path and runner.
-    CLI-layer events carry ``ctx.work`` (the v1 envelope); dispatcher-layer
-    events (``before_step`` / ``after_step`` / ``on_halt``) carry only
-    ``ctx.delivery`` (the legacy :class:`DeliveryState`). Both shapes feed
-    the same ``id: title`` / ``raw`` derivation so chat-history entries
-    stay stable across the lifecycle. Returns ``None`` when the shape is
-    unknown — callers raise ``HookError`` so the runner converts it to
-    a warning.
+    Schema v4 derives session attribution from the platform ``session_id``
+    (passed by the platform-hook dispatcher), not from a derived
+    first-user-msg. work-engine internal hooks have no platform session
+    in scope, so they omit ``--session-id`` and entries land in the
+    ``<unknown>`` session bucket.
     """
-    work = ctx.work
-    if work is not None and getattr(work, "input", None) is not None:
-        inp = work.input
-        data = getattr(inp, "data", None) or {}
-        kind = getattr(inp, "kind", None)
-        if kind == "prompt":
-            raw = data.get("raw")
-            if raw:
-                return str(raw)
-        elif kind == "ticket":
-            joined = _ticket_msg(data)
-            if joined:
-                return joined
-    delivery = ctx.delivery
-    if delivery is not None:
-        ticket = getattr(delivery, "ticket", None) or {}
-        joined = _ticket_msg(ticket)
-        if joined:
-            return joined
-    return None
-def _ticket_msg(ticket: dict) -> str:
-    ticket_id = ticket.get("id") or ""
-    title = ticket.get("title") or ""
-    return f"{ticket_id}: {title}".strip(": ").strip()
-class _ChatHistoryHookBase:
-    """Shared plumbing — script path, runner, and first-msg derivation."""
     def __init__(
         self,
         script_path: Path,
         *,
         runner: ProcessRunner | None = None,
-        first_user_msg: str | None = None,
     ) -> None:
         self.script_path = Path(script_path)
         self._runner = runner or _default_runner
-        self._fixed_msg = first_user_msg
-    def _resolve_msg(self, ctx: HookContext) -> str:
-        msg = self._fixed_msg or _derive_first_user_msg(ctx)
-        if not msg:
-            raise HookError("chat-history hook: cannot derive first-user-msg")
-        return msg
     def _invoke(self, *args: str) -> "subprocess.CompletedProcess[str]":
         cmd = [sys.executable, str(self.script_path), *args]

package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.py CHANGED Viewed

@@ -29,10 +29,9 @@ class ChatHistoryAppendHook(_ChatHistoryHookBase):
         result = ctx.result
         if result is None or getattr(result, "outcome", None) != Outcome.SUCCESS:
             return
-        msg = self._resolve_msg(ctx)
         payload: dict[str, Any] = {"step": ctx.step_name or "<unknown>"}
         proc = self._invoke(
-            "append", "--first-user-msg", msg,
+            "append",
             "--type", "phase", "--json", json.dumps(payload),
         )
         if proc.returncode != EXIT_OK:

package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.py CHANGED Viewed

@@ -22,7 +22,6 @@ class ChatHistoryHaltAppendHook(_ChatHistoryHookBase):
         registry.register(HookEvent.ON_HALT, self._on_halt)
     def _on_halt(self, ctx: HookContext) -> None:
-        msg = self._resolve_msg(ctx)
         questions: list[str] = []
         if ctx.result is not None:
             questions = list(getattr(ctx.result, "questions", []) or [])
@@ -30,7 +29,7 @@ class ChatHistoryHaltAppendHook(_ChatHistoryHookBase):
             questions = list(getattr(ctx.delivery, "questions", []) or [])
         payload = {"step": ctx.step_name or "<unknown>", "questions": questions}
         proc = self._invoke(
-            "append", "--first-user-msg", msg,
+            "append",
             "--type", "decision", "--json", json.dumps(payload),
         )
         if proc.returncode != EXIT_OK:

package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py CHANGED Viewed

@@ -4,10 +4,9 @@ Implements the producer side of
 ``docs/contracts/memory-visibility-v1.md``: derive ``asks/hits/ids``
 from ``state.memory`` and thread the rendered line into
 ``state.report`` so the agent's reply naturally carries the memory
-heartbeat.
+visibility marker.
-Fires on ``before_save`` for the same reason as
-``ChatHistoryHeartbeatHook``: ``cli._sync_back`` runs between
+Fires on ``before_save``: ``cli._sync_back`` runs between
 ``after_dispatch`` and ``before_save`` and reassigns
 ``work.report = delivery.report``. A line written on
 ``after_dispatch`` would be overwritten before ``_save``; firing on