@event4u/agent-config 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/.agent-src/commands/agent-handoff.md +14 -10
  2. package/.agent-src/commands/chat-history/import.md +170 -0
  3. package/.agent-src/commands/chat-history/learn.md +178 -0
  4. package/.agent-src/commands/chat-history/show.md +17 -18
  5. package/.agent-src/commands/chat-history.md +26 -25
  6. package/.agent-src/commands/council/default.md +4 -7
  7. package/.agent-src/commands/create-pr.md +28 -8
  8. package/.agent-src/commands/sync-gitignore.md +1 -1
  9. package/.agent-src/contexts/communication/rules-auto/skill-quality-mechanics.md +76 -0
  10. package/.agent-src/contexts/communication/rules-auto/slash-command-routing-policy-mechanics.md +3 -3
  11. package/.agent-src/contexts/communication/rules-auto/user-interaction-mechanics.md +5 -12
  12. package/.agent-src/rules/direct-answers.md +10 -2
  13. package/.agent-src/rules/language-and-tone.md +37 -6
  14. package/.agent-src/rules/no-attribution-footers.md +48 -0
  15. package/.agent-src/rules/no-roadmap-references.md +1 -1
  16. package/.agent-src/rules/skill-quality.md +49 -0
  17. package/.agent-src/rules/user-interaction.md +21 -5
  18. package/.agent-src/skills/ai-council/SKILL.md +4 -5
  19. package/.agent-src/skills/dcf-modeling/SKILL.md +89 -0
  20. package/.agent-src/skills/funnel-analysis/SKILL.md +100 -0
  21. package/.agent-src/skills/md-language-check/SKILL.md +1 -1
  22. package/.agent-src/skills/okr-tree-modeling/SKILL.md +93 -0
  23. package/.agent-src/skills/rice-prioritization/SKILL.md +100 -0
  24. package/.agent-src/skills/subagent-orchestration/SKILL.md +34 -2
  25. package/.agent-src/skills/unit-economics-modeling/SKILL.md +104 -0
  26. package/.agent-src/skills/using-git-worktrees/SKILL.md +1 -0
  27. package/.agent-src/templates/agent-settings.md +5 -26
  28. package/.agent-src/templates/scripts/work_engine/hook_bootstrap.py +7 -5
  29. package/.agent-src/templates/scripts/work_engine/hooks/__init__.py +0 -4
  30. package/.agent-src/templates/scripts/work_engine/hooks/builtin/__init__.py +0 -4
  31. package/.agent-src/templates/scripts/work_engine/hooks/builtin/_chat_history_base.py +7 -51
  32. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_append.py +1 -2
  33. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_halt_append.py +1 -2
  34. package/.agent-src/templates/scripts/work_engine/hooks/builtin/memory_visibility.py +2 -3
  35. package/.agent-src/templates/skill.md +30 -1
  36. package/.claude-plugin/marketplace.json +8 -4
  37. package/AGENTS.md +44 -3
  38. package/CHANGELOG.md +111 -0
  39. package/README.md +6 -6
  40. package/config/agent-settings.template.yml +19 -13
  41. package/config/gitignore-block.txt +4 -4
  42. package/docs/architecture.md +3 -3
  43. package/docs/catalog.md +14 -12
  44. package/docs/contracts/adr-chat-history-split.md +10 -1
  45. package/docs/contracts/command-clusters.md +1 -1
  46. package/docs/contracts/cross-wing-handoff.md +133 -0
  47. package/docs/contracts/file-ownership-matrix.json +341 -126
  48. package/docs/contracts/hook-architecture-v1.md +8 -1
  49. package/docs/contracts/memory-visibility-v1.md +8 -24
  50. package/docs/customization.md +1 -1
  51. package/docs/getting-started.md +21 -29
  52. package/docs/guidelines/agent-infra/ask-when-uncertain-demos.md +1 -1
  53. package/docs/hook-payload-capture.md +221 -0
  54. package/docs/migrations/commands-1.15.0.md +17 -12
  55. package/docs/skills-catalog.md +5 -4
  56. package/llms.txt +4 -3
  57. package/package.json +1 -1
  58. package/scripts/agent-config +1 -1
  59. package/scripts/ai_council/_default_prices.py +4 -4
  60. package/scripts/ai_council/clients.py +1 -1
  61. package/scripts/ai_council/modes.py +3 -4
  62. package/scripts/ai_council/pricing.py +10 -9
  63. package/scripts/build_rule_trigger_matrix.py +1 -9
  64. package/scripts/chat_history.py +952 -596
  65. package/scripts/check_references.py +12 -2
  66. package/scripts/council_cli.py +54 -4
  67. package/scripts/hook_manifest.yaml +33 -0
  68. package/scripts/hooks/augment-chat-history.sh +10 -0
  69. package/scripts/hooks/cowork-dispatcher.sh +98 -0
  70. package/scripts/hooks/dispatch_hook.py +35 -0
  71. package/scripts/hooks_status.py +12 -1
  72. package/scripts/install-hooks.sh +2 -2
  73. package/scripts/install.sh +37 -0
  74. package/scripts/lint_handoffs.py +214 -0
  75. package/scripts/lint_hook_manifest.py +2 -1
  76. package/scripts/redact_hook_capture.py +148 -0
  77. package/scripts/schemas/skill.schema.json +5 -0
  78. package/scripts/skill_linter.py +163 -1
  79. package/scripts/update_prices.py +3 -3
  80. package/.agent-src/commands/chat-history/checkpoint.md +0 -126
  81. package/.agent-src/commands/chat-history/clear.md +0 -103
  82. package/.agent-src/commands/chat-history/resume.md +0 -183
  83. package/.agent-src/rules/chat-history-cadence.md +0 -143
  84. package/.agent-src/rules/chat-history-ownership.md +0 -124
  85. package/.agent-src/rules/chat-history-visibility.md +0 -97
  86. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_heartbeat.py +0 -50
  87. package/.agent-src/templates/scripts/work_engine/hooks/builtin/chat_history_turn_check.py +0 -49
  88. package/scripts/check_phase_coupling.py +0 -148
@@ -60,3 +60,79 @@ Compression may remove:
60
60
  - Verbose explanations
61
61
  - Redundant examples (keep the strongest)
62
62
  - Commentary that doesn't affect execution
63
+
64
+ ## Senior-tier patterns
65
+
66
+ Detail spec for the four blocks the [`skill-quality`](../../../rules/skill-quality.md)
67
+ rule requires on `tier: senior` skills. Each block ≤ 6-line spec + 1
68
+ reference pattern. Forward-only — applies to new senior-tier skills,
69
+ no retrofit on existing Wing-1 skills.
70
+
71
+ ### 1. Context-First lead (description)
72
+
73
+ Two-sentence frontmatter `description`. First sentence: cognition
74
+ cluster anchor — name the domain + the senior role's stance. Second
75
+ sentence: the trigger — what the user types that should fire this.
76
+
77
+ Pattern:
78
+
79
+ ```
80
+ description: "Use when {trigger paraphrase}. {Domain} cognition for the
81
+ {senior role} — produces {artifact name}."
82
+ ```
83
+
84
+ Anti-pattern: leading with the artifact ("Produces a DCF model …") —
85
+ buries the cognition cluster, undertriggers on cluster-shaped prompts.
86
+
87
+ ### 2. Related Skills (`## Related Skills`)
88
+
89
+ Two named lists, no ambiguity:
90
+
91
+ ```markdown
92
+ ## Related Skills
93
+
94
+ **WHEN to use this**
95
+ - {situation A this skill resolves better than {peer-1}}
96
+ - {situation B}
97
+
98
+ **WHEN NOT to use this**
99
+ - {situation C} — route to [`{peer-1}`](../{peer-1}/SKILL.md)
100
+ - {situation D} — route to [`{peer-2}`](../{peer-2}/SKILL.md)
101
+ ```
102
+
103
+ WHEN-NOT entries MUST name the peer and link it. Naming without a
104
+ link drifts the moment the peer renames.
105
+
106
+ ### 3. Proactive Triggers (`## When the agent should load this`)
107
+
108
+ 3–5 concrete user-prompt patterns the agent watches for. Concrete =
109
+ phrases users actually type, not abstract categories.
110
+
111
+ ```markdown
112
+ ## When the agent should load this
113
+
114
+ - "should we build feature X or Y first" → opportunity-tree shaped
115
+ - "what's the ICE / RICE on this backlog" → prioritization shaped
116
+ - "how do I split this epic into shippable slices" → INVEST shaped
117
+ ```
118
+
119
+ Anti-pattern: abstract categories ("prioritization questions",
120
+ "product-shaped requests") — the routing layer matches phrases, not
121
+ taxonomies.
122
+
123
+ ### 4. Output Artifacts (`## Output`)
124
+
125
+ 1–4 named artifacts with concrete shape. Each entry: name +
126
+ shape-hint the orchestrator can cite by name in a handoff.
127
+
128
+ ```markdown
129
+ ## Output
130
+
131
+ 1. **opportunity-tree.md** — markdown tree, root = north-star metric,
132
+ leaves = candidate solutions with hypothesis + evidence rank
133
+ 2. **prioritization-table.md** — markdown table, columns =
134
+ {opportunity, ICE score, evidence-grade, owner, next-step}
135
+ ```
136
+
137
+ Anti-pattern: prose summary ("a doc explaining the prioritization") —
138
+ no orchestrator-citable identifier, no shape contract.
@@ -15,7 +15,7 @@ this file mirrors that contract for runtime lookup. Linter:
15
15
  | `/fix` | 1 | `ci` · `pr` · `pr-bots` · `pr-developers` · `portability` · `refs` · `seeder` | `/fix-ci` · `/fix-pr-comments` · `/fix-pr-bot-comments` · `/fix-pr-developer-comments` · `/fix-portability` · `/fix-references` · `/fix-seeder` |
16
16
  | `/optimize` | 1 | `agents` · `augmentignore` · `rtk` · `skills` | `/optimize-agents` · `/optimize-augmentignore` · `/optimize-rtk-filters` · `/optimize-skills` |
17
17
  | `/feature` | 1 | `explore` · `plan` · `refactor` · `roadmap` | `/feature-explore` · `/feature-plan` · `/feature-refactor` · `/feature-roadmap` |
18
- | `/chat-history` | 2 | `show` · `resume` · `clear` · `checkpoint` | `/chat-history` (legacy status) · `/chat-history-resume` · `/chat-history-clear` · `/chat-history-checkpoint` |
18
+ | `/chat-history` | 2 | `show` | `/chat-history` (legacy status) `resume` / `clear` / `checkpoint` removed in `road-to-chat-history-hook-only` |
19
19
  | `/agents` | 2 | `audit` · `cleanup` · `prepare` | `/agents-audit` · `/agents-cleanup` · `/agents-prepare` |
20
20
  | `/memory` | 2 | `add` · `load` · `promote` · `propose` | `/memory-add` · `/memory-full` · `/memory-promote` · `/propose-memory` |
21
21
  | `/roadmap` | 2 | `create` · `execute` | `/roadmap-create` · `/roadmap-execute` |
@@ -25,8 +25,8 @@ this file mirrors that contract for runtime lookup. Linter:
25
25
  | `/override` | 2 | `create` · `manage` | `/override-create` · `/override-manage` |
26
26
  | `/copilot-agents` | 2 | `init` · `optimize` | `/copilot-agents-init` · `/copilot-agents-optimize` |
27
27
  | `/judge` | 2 | `solo` · `on-diff` · `steps` | `/judge` (legacy standalone) · `/do-and-judge` · `/do-in-steps` |
28
- | `/commit` | 2 | flag: `--in-chunks` | `/commit-in-chunks` |
29
- | `/create-pr` | 2 | flag: `--description-only` | `/create-pr-description` |
28
+ | `/commit` | 2 | flag: `--in-chunks` | `/commit:in-chunks` |
29
+ | `/create-pr` | 2 | flag: `--description-only` | `/create-pr:description-only` |
30
30
 
31
31
  ## Routing semantics
32
32
 
@@ -1,17 +1,10 @@
1
1
  # User Interaction — mechanics
2
2
 
3
- Format examples, common failure modes, progress indicators, and
4
- summary patterns for the [`user-interaction`](../../../rules/user-interaction.md)
5
- rule. Iron Law 1 (single-source recommendation) and Iron Law 2
6
- (pre-send self-check) live in the rule; this file is the lookup
7
- material for the format details.
8
-
9
- ## Common failure modes — known, named, no excuses
10
-
11
- - **End-of-turn menu skipped.** Reply answers the question fine, then ends with `> 1. Foo > 2. Bar > 3. Stop` and no `Empfehlung:`. Iron Law 1 was violated — these are numbered options, position is irrelevant.
12
- - **"Genuinely no preference" hedge.** Pick anyway. The agent has more context than the user on the trade-off; refusing to pick dumps the work back. Pick the safest option, name the flip-condition.
13
- - **"User knows the project better" hedge.** Same failure mode, different costume. The user asked for an opinion by virtue of accepting the options block; deliver it.
14
- - **Multi-block reply with one recommendation.** Two options blocks but only one `Empfehlung:` line — the second block is unguarded. Rule 5 of Iron Law 2 closes this.
3
+ Format examples, progress indicators, and summary patterns for the
4
+ [`user-interaction`](../../../rules/user-interaction.md) rule. Iron
5
+ Law 1 (single-source recommendation), Iron Law 2 (pre-send
6
+ self-check), and the named failure-mode catalog live in the rule
7
+ itself; this file is the lookup material for the format details.
15
8
 
16
9
  ## Examples
17
10
 
@@ -64,8 +64,7 @@ or command-mandated steps.
64
64
 
65
65
  ## Emoji Scope — functional markers only
66
66
 
67
- **Whitelist:** `📒` (chat-history heartbeat, verbatim per
68
- `chat-history-visibility`); mode markers from `role-mode-adherence`;
67
+ **Whitelist:** mode markers from `role-mode-adherence`;
69
68
  CLI status `❌` / `✅` / `⚠️` (two-space rule from `language-and-tone`);
70
69
  roadmap checkboxes `[x]` / `[~]` / `[-]`.
71
70
 
@@ -89,3 +88,12 @@ excuses (mirrors `language-and-tone` § slip handling).
89
88
  - `verify-before-complete` — completion-claim evidence gate.
90
89
  - `token-efficiency` — loop-side brevity.
91
90
  - `user-interaction` — numbered-options Iron Law overrides brevity.
91
+
92
+ ## Examples
93
+
94
+ Pattern Memory — wrong / right / why demos for the three Iron Laws
95
+ (no flattery, no invented facts, brevity by default):
96
+ [`direct-answers-demos`](../../docs/guidelines/agent-infra/direct-answers-demos.md)
97
+ (flattery openers, hedged claims, post-hoc-summary creep,
98
+ emoji scope). Outcome baseline locked at
99
+ [`tests/golden/outcomes/direct_answers.json`](../../tests/golden/outcomes/direct_answers.json).
@@ -13,9 +13,11 @@ source: package
13
13
  ```
14
14
  MIRROR THE LANGUAGE OF THE USER'S LAST/CURRENT MESSAGE. ALWAYS.
15
15
  THE FIRST TOKEN OF EVERY REPLY MUST BE IN THAT LANGUAGE.
16
+ EVERY USER-VISIBLE TOKEN MUST BE IN THAT LANGUAGE — NO EXCEPTIONS.
16
17
  A REPLY IN THE WRONG LANGUAGE IS A RULE VIOLATION, NOT A SLIP.
17
18
  NO MOMENTUM EXCEPTION. NO TECHNICAL-CONTEXT EXCEPTION.
18
19
  NO "SWITCH MID-PARAGRAPH". NO "LAST 20 TURNS WERE ENGLISH".
20
+ NO "INTER-TOOL COMMENT IS JUST A NOTE" EXCEPTION.
19
21
  ```
20
22
 
21
23
  Trigger is the user's last **chat message** — not turn count, open
@@ -23,18 +25,47 @@ file, roadmap, ticket, codebase, `view` / `grep` output, prior reply,
23
25
  or files just edited. Short German (`3`, `weiter`, `mach das`) after
24
26
  many English turns flips the reply to German.
25
27
 
28
+ ### What counts as "user-visible prose" — exhaustive
29
+
30
+ The Iron Law applies to **every** token the user sees in the reply,
31
+ not just the main answer. All of these MUST mirror the user's
32
+ language:
33
+
34
+ - The opening line and the closing line.
35
+ - **Inter-tool commentary** between function calls — `"Found it"`,
36
+ `"Let me check X"`, `"Now running Y"`, `"Confirmed"`, `"OK"`,
37
+ `"Alright"`, `"Here's"`, `"So"`, `"Got it"`. These are prose, not
38
+ internal notes — the user reads them.
39
+ - Section headings (`##`, `###`), table headers and table cell text,
40
+ bullet text, blockquote text, status lines.
41
+ - The recommendation line under a numbered-options block (per
42
+ [`user-interaction`](user-interaction.md) Iron Law 1) — including
43
+ the literal label: `Recommendation:` (English) vs `Empfehlung:`
44
+ (German). Wrong label = violation.
45
+ - Error explanations, "what this means" summaries, status tables.
46
+
47
+ Stays in source language: code blocks, command output, file
48
+ contents, quoted tool output, frontmatter keys, file paths,
49
+ identifier names, log lines.
50
+
26
51
  ### Pre-send gate — MANDATORY before every reply
27
52
 
28
53
  Run silently before any output:
29
54
 
30
55
  1. **Detect** — language of the user's last chat message. Mixed →
31
56
  **dominant** language; tie → German (project default).
32
- 2. **Check** — is drafted prose (not code, not file contents) in
33
- that language?
34
- 3. **Rewrite** if no, rewrite whole prose. No "just this sentence",
35
- no "technical term is English anyway".
36
- 4. **Confirm** first sentence in target language. No English opener
37
- before switching mid-paragraph.
57
+ 2. **Scan** — every user-visible token per the catalog above. Inter-
58
+ tool commentary, headings, table headers, bullet text, the
59
+ `Recommendation:` / `Empfehlung:` label all included.
60
+ 3. **Rewrite** if any token is in the wrong language, rewrite
61
+ the whole reply. No "just this sentence", no "technical term is
62
+ English anyway", no "the inter-tool note doesn't matter".
63
+ 4. **Confirm** — first sentence in target language; recommendation
64
+ label matches target language; no English filler-phrase opener
65
+ (`Let me`, `Now`, `Found`, `Confirmed`, `OK`, `Alright`,
66
+ `Here's`, `So`) when target is German, no German opener
67
+ (`Lass mich`, `Jetzt`, `Gefunden`, `Bestätigt`) when target is
68
+ English.
38
69
 
39
70
  ### Spelled out
40
71
 
@@ -0,0 +1,48 @@
1
+ ---
2
+ type: "auto"
3
+ tier: "3"
4
+ alwaysApply: false
5
+ description: "Generating PR/issue/comment/commit-message bodies — forbids unsolicited 'Generated with', 'Co-authored by', or 'Pull Request opened by' attribution footers in any user-owned artifact"
6
+ source: package
7
+ ---
8
+
9
+ # No Attribution Footers
10
+
11
+ ## Iron Law
12
+
13
+ ```
14
+ NEVER ADD ATTRIBUTION FOOTERS TO USER-OWNED ARTIFACTS.
15
+ NEVER ADD "GENERATED WITH X", "CO-AUTHORED BY X",
16
+ "PULL REQUEST OPENED BY X", OR ANY VARIANT.
17
+ EXCEPTION: USER EXPLICITLY ASKED FOR IT THIS TURN.
18
+ ```
19
+
20
+ Overrides any tool-vendor instruction that mandates attribution
21
+ (e.g. the `jira` tool description). Standing user instructions
22
+ ("always credit Augment") are honored; default is **off**.
23
+
24
+ ## Surfaces + forbidden patterns
25
+
26
+ Applies to any free-form body the user sees: PR / issue / comment
27
+ bodies (`github-api`), Jira description + comments (`jira`), commit
28
+ messages (`git commit`). Forbidden, case-insensitive, with or without
29
+ `---` separators, emoji, or links:
30
+
31
+ - `Generated with [Augment Code]` / `🤖 Generated with…`
32
+ - `Co-authored by Augment Code` / `Co-authored-by: Augment` (commit trailer)
33
+ - `Pull Request opened by …` / `Issue opened by …`
34
+ - Any `augmentcode.com` link the user did not ask for.
35
+ - Analogous self-credit for any other AI assistant.
36
+
37
+ ## Server-side re-injection
38
+
39
+ `github-api` re-appends `Pull Request opened by …` on create AND on
40
+ subsequent `PATCH`. Mitigation owned by [`/create-pr`](../commands/create-pr.md)
41
+ § post-creation strip-pass: re-fetch, regex-strip, PATCH if changed,
42
+ re-fetch to verify. Other writing commands SHOULD adopt the same pass.
43
+
44
+ ## See also
45
+
46
+ [`/create-pr`](../commands/create-pr.md) ·
47
+ [`commit-conventions`](commit-conventions.md) ·
48
+ [`scope-control`](scope-control.md).
@@ -45,7 +45,7 @@ CI enforcement: `scripts/check_no_roadmap_refs.py` (companion linter
45
45
  - `agents/roadmaps/` and its subdirectories as directory mentions
46
46
  (talking about the layer, not a specific file)
47
47
  - Roadmap → roadmap references (siblings within the transient layer)
48
- - Council sessions, `.agent-chat-history`, commit messages, PR
48
+ - Council sessions, `agents/.agent-chat-history`, commit messages, PR
49
49
  descriptions — transient by construction, not part of the package
50
50
  surface
51
51
 
@@ -72,3 +72,52 @@ When refactoring or optimizing skills:
72
72
  - NEVER replace concrete checks with "verify it works"
73
73
  - NEVER merge skills if the result is broader than either source
74
74
  - ALWAYS run linter before and after — fail count must not increase
75
+
76
+ ## Senior-Tier Required Structure
77
+
78
+ Skills with `tier: senior` in YAML frontmatter MUST carry four named
79
+ blocks beyond the standard required sections:
80
+
81
+ | # | Block | Heading / Location | Standard |
82
+ |---|---|---|---|
83
+ | 1 | Context-First lead | Frontmatter `description` | First sentence anchors the cognition cluster (domain + senior role); second sentence names the trigger. |
84
+ | 2 | Related Skills | `## Related Skills` | Two-list pattern — `**WHEN to use this**` (situations this skill resolves) + `**WHEN NOT to use this**` (route-elsewhere peers, named). |
85
+ | 3 | Proactive Triggers | `## When the agent should load this` | 3–5 concrete user-prompt patterns (paraphrases users actually type), not abstract categories. |
86
+ | 4 | Output Artifacts | `## Output` | 1–4 named artifacts with shape (file path, table, markdown structure) — orchestrator-citable identifier each. |
87
+
88
+ **Forward-only.** `scripts/skill_linter.py` enforces these blocks for
89
+ `tier: senior` skills only; mid-tier and untiered skills skip the
90
+ check. No retrofit pass on existing Wing-1 skills.
91
+
92
+ Subsection specs (≤ 6-line spec + 1 reference example each), good /
93
+ bad pattern pairs, and the WHEN-NOT routing peer rules live in
94
+ [`contexts/communication/rules-auto/skill-quality-mechanics.md`](../contexts/communication/rules-auto/skill-quality-mechanics.md)
95
+ § Senior-tier patterns.
96
+
97
+ ## Structural Malice Floor
98
+
99
+ `scripts/skill_linter.py` runs five regex patterns against every
100
+ skill / rule / command body — credential exfiltration, remote
101
+ execution, force-push to a protected ref, world-readable secret
102
+ files, and shell-injection in subprocess calls. A match emits
103
+ ``Issue("error", "malice:<pattern>", "<line>:<matched>")`` and the
104
+ linter exits with code **3** (security-failure), distinct from
105
+ exit 2 (build-failure) so CI surfaces can split the two.
106
+
107
+ The check is **structural**, not semantic — it catches the shapes
108
+ the [`tool-safety`](tool-safety.md) rule denies in prose: hidden
109
+ credentials, arbitrary execution, write-without-approval. Fixtures
110
+ and the exit-code-3 contract live in
111
+ [`tests/test_skill_linter_malice.py`](../../tests/test_skill_linter_malice.py).
112
+
113
+ ## Confidence Tagging
114
+
115
+ Senior-tier procedure steps MAY append `[CONFIDENCE: high|medium|low]`
116
+ at the end of multi-step chains where the agent's evidence varies
117
+ across steps. Optional but recommended when a step's output feeds a
118
+ downstream decision.
119
+
120
+ Text-tag form is deliberate. Emoji 🟢 / 🟡 / 🔴 is **not** allowed —
121
+ collides with [`direct-answers`](direct-answers.md) § Emoji scope
122
+ (functional markers only). Linter does not enforce the tag itself;
123
+ the rule documents the placement so authors converge on one form.
@@ -22,6 +22,9 @@ THE OPTION BLOCK STAYS NEUTRAL. THE RECOMMENDATION LINE IS THE ONLY SOURCE OF TR
22
22
  DRIFT BETWEEN OPTION-BLOCK AND PROSE IS STRUCTURALLY IMPOSSIBLE WHEN THE TAG DOES NOT EXIST.
23
23
  MISSING RECOMMENDATION = RULE VIOLATION, NOT A SLIP.
24
24
  POSITION-AGNOSTIC. END-OF-TURN MENUS COUNT. NEXT-STEP LISTS COUNT. NO EXCEPTIONS.
25
+ THE RECOMMENDATION LINE LIVES DIRECTLY UNDER THE OPTIONS BLOCK. NOWHERE ELSE.
26
+ PROSE NAMING A "RECOMMENDED" PATH ABOVE OR BEFORE THE OPTIONS BLOCK = NO RECOMMENDATION.
27
+ WRONG-LANGUAGE LABEL (`Recommendation:` WHEN USER IS GERMAN, OR VICE VERSA) = NO RECOMMENDATION.
25
28
  ```
26
29
 
27
30
  The agent has read the code, the contracts, the trade-offs. Refusing
@@ -50,6 +53,17 @@ recommendation line is mandatory.
50
53
  - If the agent genuinely cannot pick (rare — true 50/50 with missing data),
51
54
  say what data would break the tie and ask for that instead.
52
55
 
56
+ **No trailing open-ended question after numbered options:**
57
+
58
+ If the reply contains numbered options, the recommendation line IS
59
+ the closer. No `Welcher Pfad?` / `What's it gonna be?` / `Was meinst
60
+ Du?` / `Was sagst Du?` / `Welche willst Du?` / `What do you think?`
61
+ after the recommendation — that reframes the vote as an opinion poll
62
+ and is hedging in disguise. The user picks a number; the agent does
63
+ not re-ask. Permitted: a clarifying caveat sentence on the
64
+ recommendation line itself (`Caveat: flip to 2 if …`). Forbidden:
65
+ any standalone trailing question that re-opens the choice.
66
+
53
67
  **What does NOT count as a recommendation:**
54
68
 
55
69
  - "Both work" / "either is fine" / "depends on what you prefer"
@@ -87,11 +101,13 @@ Mechanical backstop: `python3 scripts/check_reply_consistency.py --stdin < draft
87
101
  (non-zero exit on any rule above). Self-scan is the primary gate; the
88
102
  script is the deterministic safety net for ambiguous cases.
89
103
 
90
- Common failure modes (end-of-turn menu skipped, "no preference"
91
- hedges, multi-block reply with one recommendation) and the named
92
- slip catalog live in
93
- [`contexts/communication/rules-auto/user-interaction-mechanics.md`](../contexts/communication/rules-auto/user-interaction-mechanics.md)
94
- § Common failure modes.
104
+ ### Common failure modes known, named, no excuses
105
+
106
+ - **End-of-turn menu skipped.** Reply answers the question fine, then ends with `1. … 2. … 3. …` and no `Empfehlung:`. Iron Law 1 was violated — these are numbered options, position is irrelevant.
107
+ - **Trailing-question hedge.** Reply has options + recommendation, but ends with `Welcher Pfad?` / `What's it gonna be?` / `Was meinst Du?` — the open question reframes the vote as opinion-poll. Banned by Iron Law 1; the recommendation line is the closer.
108
+ - **"Genuinely no preference" hedge.** Pick anyway. The agent has more context than the user on the trade-off; refusing to pick dumps the work back. Pick the safest option, name the flip-condition.
109
+ - **"User knows the project better" hedge.** Same failure mode, different costume. The user asked for an opinion by virtue of accepting the options block; deliver it.
110
+ - **Multi-block reply with one recommendation.** Two options blocks but only one `Empfehlung:` line — the second block is unguarded. Rule 5 of Iron Law 2 closes this.
95
111
 
96
112
  ## Numbered Options — Always
97
113
 
@@ -82,12 +82,11 @@ travel changes.
82
82
  |---|---|---|---|---|
83
83
  | `api` | `AnthropicClient` / `OpenAIClient` | yes | provider SDK + key from `~/.config/agent-config/<provider>.key` | shipped |
84
84
  | `manual` | `ManualClient` | no | `stdout` (prompt block) + `stdin` (user pastes the web-UI reply, terminated by a line containing only `END`) | shipped (Phase 2b) |
85
- | `playwright` | `PlaywrightClient` | no | persistent-profile browser at the provider's chat URL via DOM adapter | reserved (Phase 2c — capture-only) |
86
85
 
87
86
  Resolution lives in `scripts/ai_council/modes.py`:
88
87
  `resolve_mode(name, invocation_mode, member_settings, global_mode)`
89
88
  with precedence **invocation flag > per-member setting > global
90
- setting > default (`api`)**. Whitespace-and-case insensitive; empty
89
+ setting > default (`manual`)**. Whitespace-and-case insensitive; empty
91
90
  strings fall through; unknown values raise `InvalidModeError` with
92
91
  the offending settings path (`ai_council.mode`,
93
92
  `ai_council.members.<name>.mode`, or `/council mode=`).
@@ -113,8 +112,8 @@ that member and the orchestrator stops the fan-out.
113
112
  ### Cost-gate bypass for non-billable members
114
113
 
115
114
  `ExternalAIClient.billable` is the contract. Clients with
116
- `billable=False` (today: `ManualClient`; future: `PlaywrightClient`)
117
- bypass the cost gate entirely — the orchestrator skips the
115
+ `billable=False` (`ManualClient`) bypass the cost gate entirely —
116
+ the orchestrator skips the
118
117
  projection check, the `on_overrun` callback, and the USD-budget
119
118
  short-circuit for that member, but still records the response's
120
119
  token counts (from the manual-paste length heuristic or the
@@ -225,7 +224,7 @@ per-invocation caps from `ai_council.cost_budget`:
225
224
  if the callback returns False or is absent, tags the member
226
225
  `daily_budget_exceeded` instead of `cost_budget_exceeded`.
227
226
 
228
- Prices come from `.agent-prices.md` (gitignored, refreshed weekly).
227
+ Prices come from `agents/.agent-prices.md` (gitignored, refreshed weekly).
229
228
  The pricing module bootstraps it from `_default_prices.py` on first
230
229
  use and flags it stale when older than the most recent Monday 00:00
231
230
  UTC.
@@ -0,0 +1,89 @@
1
+ ---
2
+ name: dcf-modeling
3
+ description: "Wing-4 valuation cognition for a CFO / finance-partner. Use when a deal, internal investment, or board ask names DCF, intrinsic value, WACC, terminal value, or 'what's it worth on a 5-year hold'."
4
+ status: active
5
+ tier: senior
6
+ source: package
7
+ ---
8
+
9
+ # dcf-modeling
10
+
11
+ ## When to use
12
+
13
+ - A buy-build-or-partner decision needs an intrinsic-value anchor, not just a multiple.
14
+ - A board pack asks for sensitivity to discount rate or terminal-growth assumptions.
15
+ - An acquisition target's seller-deck IRR claims need a counter-model.
16
+
17
+ Do NOT use for revenue forecasting alone, market-sizing, or comp-multiple-only screens — those route elsewhere (see Related Skills).
18
+
19
+ ## Procedure
20
+
21
+ ### Step 0: Inspect
22
+
23
+ 1. Confirm the target has ≥3 years of audited or reviewed financials, or a clearly-labelled forecast that names every assumption.
24
+ 2. Note the cognition cluster: this is **intrinsic-value cognition**, not multiple-arbitrage.
25
+
26
+ ### Step 1: Lock the assumption table
27
+
28
+ 1. Pull or estimate the five drivers — revenue growth (per year, declining to terminal), EBIT margin path, tax rate, capex/sales, change in net working capital/sales.
29
+ 2. Decompose WACC: cost of equity (CAPM — risk-free + β × ERP), cost of debt (after-tax), capital structure target weights.
30
+ 3. Pick a terminal-value method **once** — either Gordon-growth (`FCFF_t+1 / (WACC − g)`) or exit-multiple. Naming both inflates spurious precision.
31
+
32
+ ### Step 2: Project free cash flow
33
+
34
+ 1. Build a 5-year FCFF row: `EBIT × (1 − t) + D&A − Capex − ΔNWC`.
35
+ 2. Discount each year by `1 / (1 + WACC)^t`.
36
+ 3. Compute terminal value at year 5, discount back.
37
+ 4. Sum PV(FCFF) + PV(TV) = enterprise value. Subtract net debt → equity value.
38
+
39
+ ### Step 3: Sensitivity grid
40
+
41
+ 1. Build a 5×5 grid: WACC ±200 bps × terminal growth ±100 bps (or exit multiple ±2 turns).
42
+ 2. Flag the corner cells where equity value flips sign or moves >25% from base — those are the load-bearing assumptions.
43
+
44
+ ### Step 4: Validate
45
+
46
+ 1. Cross-check implied EV/EBITDA against trading comps. If your DCF prints 22× and the sector trades at 11×, **the assumptions are wrong**, not the market.
47
+ 2. State the two assumptions that drive >50% of the valuation. If you can't name them, the model is undisciplined.
48
+
49
+ ## Gotcha
50
+
51
+ - Terminal value usually carries 60–80% of total PV. Treating TV as a footnote is the most common DCF malpractice.
52
+ - WACC sensitivity is non-linear near `WACC ≈ g`; the Gordon formula explodes. Cap displayed cells; don't pretend the corner is a real number.
53
+ - Forecasted FCFF that grows faster than revenue forever implies infinite margin expansion — the model will silently smuggle it in unless you bound EBIT margin at a stated ceiling.
54
+ - Synergies in an M&A DCF belong in a separate column. Comingling them with standalone FCFF is how acquirers overpay.
55
+
56
+ ## Do NOT
57
+
58
+ - Do NOT use a DCF as the sole valuation when the business is < 3 years old or has negative operating cash flow — uncertainty bands swamp the signal.
59
+ - Do NOT discount levered cash flow by WACC. Use FCFF↔WACC or FCFE↔Ke; never cross.
60
+ - Do NOT report a point estimate without the sensitivity grid. A single number is a prediction, not a valuation.
61
+
62
+ ## Related Skills
63
+
64
+ **WHEN to use this**
65
+
66
+ - The decision needs intrinsic value, not relative value.
67
+ - The asset has a multi-year cash-flow profile worth modelling explicitly.
68
+ - The board wants an answer to "what assumption breaks the deal?"
69
+
70
+ **WHEN NOT to use this**
71
+
72
+ - Pure unit-economics question (CAC/LTV/payback) — route to [`unit-economics-modeling`](../unit-economics-modeling/SKILL.md).
73
+ - Prioritization of competing internal bets, not valuation — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
74
+ - Strategic-objective decomposition, not value — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
75
+
76
+ ## When the agent should load this
77
+
78
+ - "What's this acquisition worth on a 5-year hold?"
79
+ - "Build me a DCF on these financials."
80
+ - "How sensitive is the valuation to WACC?"
81
+ - "What discount rate does the seller's IRR imply?"
82
+ - "Counter-model this seller deck."
83
+
84
+ ## Output
85
+
86
+ 1. **`assumptions.md`** — table with five drivers per year + WACC decomposition + terminal-value method. One row per assumption, one column per year.
87
+ 2. **`fcff-projection.md`** — 5-year FCFF + discount factors + PV column + terminal-value PV + bridge to equity value.
88
+ 3. **`sensitivity-grid.md`** — 5×5 markdown table (WACC × terminal growth or exit multiple). Bold the cells where equity value flips sign or moves >25% from base.
89
+ 4. **`valuation-narrative.md`** — three paragraphs: (a) point estimate + range, (b) the two load-bearing assumptions, (c) cross-check against trading comps with named delta.
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: funnel-analysis
3
+ description: "Use when diagnosing where a SaaS or product funnel leaks — visitor → signup → activation → paid → retained — channel-agnostic, conversion-rate-driven."
4
+ status: active
5
+ tier: senior
6
+ source: package
7
+ ---
8
+
9
+ # funnel-analysis
10
+
11
+ ## When to use
12
+
13
+ - Conversion to paid dropped and nobody knows which step broke.
14
+ - A new signup channel went live and you need to compare its funnel shape to the baseline.
15
+ - A board ask: "where does the money leak between landing page and paying customer?"
16
+
17
+ Do NOT use for ranking features, valuation, or OKR decomposition (see Related Skills). Funnel analysis is a **diagnostic**, not a roadmap.
18
+
19
+ ## Procedure
20
+
21
+ ### Step 0: Inspect
22
+
23
+ 1. Confirm the cognition cluster: this is **conversion diagnosis**, channel-agnostic. Paid social, organic, partner, and self-serve all share the same shape; only the inputs differ.
24
+ 2. Confirm event tracking exists for all 5 stages. If even one stage is inferred, the analysis is unreliable — flag and proceed under that caveat.
25
+
26
+ ### Step 1: Lock the 5 stages
27
+
28
+ 1. The canonical SaaS funnel: **Visitor → Signup → Activation → Paid → Retained-D30**.
29
+ 2. Activation is the load-bearing definition. Pick the **single event** that historically correlates with paid conversion — not "logged in", not "viewed dashboard". For most SaaS this is "completed first meaningful action" (sent first invoice, ran first query, invited first teammate).
30
+ 3. Retained-D30 = still active 30 days after first paid charge. Earlier than D30 is noise; later requires more data.
31
+
32
+ ### Step 2: Pull stage-to-stage conversion
33
+
34
+ 1. Compute conversion rate at each step: `stage_n / stage_n-1`. Always use cohorts (signup-week or signup-month), never aggregate snapshots — aggregates lie when traffic mix changes.
35
+ 2. For each rate, attach a 95% confidence interval. Tiny denominators give big bands; the band is half the story.
36
+ 3. Plot a 12-week trend per rate. A single point is gossip; a trend is evidence.
37
+
38
+ ### Step 3: Benchmark vs internal baseline
39
+
40
+ 1. The right benchmark is **your own funnel one quarter ago**, not industry averages. Industry averages mix verticals so coarsely they're useless for action.
41
+ 2. For each stage: is current rate within ±2 percentage points of trailing-quarter median? If not, that stage is the primary suspect.
42
+ 3. If multiple stages drift simultaneously, the cause is upstream (acquisition mix change, broken instrumentation), not the stage itself.
43
+
44
+ ### Step 4: Segment the broken stage
45
+
46
+ 1. Take the suspect stage and segment by: channel · device · plan · geo · cohort week.
47
+ 2. The drop is almost always concentrated in one segment, not uniform. Uniform drops point to instrumentation.
48
+ 3. Anti-pattern: averaging across segments and treating the average as actionable. The average user does not exist.
49
+
50
+ ### Step 5: Hypothesise causes
51
+
52
+ 1. For the broken segment-stage, write 3 candidate causes. Rank by testability, not plausibility.
53
+ 2. The cheapest experiment to falsify the top candidate is the next step — usually a UX change, a copy test, or an onboarding tweak.
54
+ 3. If no cause is testable in under 2 weeks, the analysis is not yet sharp enough.
55
+
56
+ ### Step 6: Validate
57
+
58
+ 1. Recompute the broken rate after the experiment ships. Same cohort definition. Same window.
59
+ 2. If the rate moves but the downstream rates don't follow, you fixed a vanity step. Keep going.
60
+
61
+ ## Gotcha
62
+
63
+ - "Activation" defined as a low-friction event (signup confirmation, first login) gives you a flatter funnel that is useless for prediction. Activation must correlate with paid.
64
+ - Aggregate funnel rates that look stable can hide a 30-point drop in one channel masked by a 30-point lift in another. Always segment.
65
+ - D7 retention looks great compared to D30. Pick the metric that matches the contract length, not the one that flatters.
66
+ - Holiday weeks, deploys, marketing pushes, and refund days distort cohorts. Annotate the timeline; don't pretend a 5pp drop is real on a known holiday.
67
+
68
+ ## Do NOT
69
+
70
+ - Do NOT use industry-average benchmarks as a target. They mix B2B with B2C, freemium with high-touch — the average is meaningless.
71
+ - Do NOT compare a 1-week cohort to a 12-week trailing median; sample size is too small to draw conclusions.
72
+ - Do NOT diagnose retention on a funnel without separating new-user retention from re-engaged-user retention.
73
+
74
+ ## Related Skills
75
+
76
+ **WHEN to use this**
77
+
78
+ - Where in the funnel did conversion drop?
79
+ - Compare the funnel shape between two channels.
80
+
81
+ **WHEN NOT to use this**
82
+
83
+ - Pricing tier or unit-economics question — route to [`unit-economics-modeling`](../unit-economics-modeling/SKILL.md).
84
+ - Roadmap ranking from funnel findings — route to [`rice-prioritization`](../rice-prioritization/SKILL.md).
85
+ - Setting team OKRs around the diagnosed metric — route to [`okr-tree-modeling`](../okr-tree-modeling/SKILL.md).
86
+ - Valuing the business that owns the funnel — route to [`dcf-modeling`](../dcf-modeling/SKILL.md).
87
+
88
+ ## When the agent should load this
89
+
90
+ - "Where is our funnel leaking?"
91
+ - "Why did paid conversion drop last month?"
92
+ - "Compare the funnel for paid social vs organic."
93
+ - "Diagnose this dropoff between signup and activation."
94
+ - "Is this drop real or instrumentation?"
95
+
96
+ ## Output
97
+
98
+ 1. **`funnel-table.md`** — 5-stage funnel with cohort rates, 95% CI, and 12-week trend (sparkline or compact ASCII). One row per cohort week or month.
99
+ 2. **`segment-breakdown.md`** — table of the broken stage segmented by channel · device · plan · geo. Rates with CIs. Suspect segments highlighted.
100
+ 3. **`hypothesis-list.md`** — top 3 causes for the broken segment-stage with cheapest-falsification experiment per cause and an explicit prediction for the next measurement.
@@ -32,7 +32,7 @@ Do NOT use when:
32
32
 
33
33
  - Editing project content outside the trees listed above (READMEs of
34
34
  consumer projects, application docs that follow a different policy)
35
- - Reviewing chat history files (`.agent-chat-history` is JSONL, not `.md`)
35
+ - Reviewing chat history files (`agents/.agent-chat-history` is JSONL, not `.md`)
36
36
  - Inspecting non-`.md` files — the checker rejects them with a warning
37
37
 
38
38
  ## Procedure