bossbuild 0.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/LICENSE +21 -0
  2. package/PRINCIPLES.md +70 -0
  3. package/README.md +213 -0
  4. package/VERSION +1 -0
  5. package/bin/boss +3 -0
  6. package/library/README.md +19 -0
  7. package/library/agents/.gitkeep +0 -0
  8. package/library/agents/mentor-venture.md +57 -0
  9. package/library/hooks/.gitkeep +0 -0
  10. package/library/hooks/auto-log.js +133 -0
  11. package/library/hooks/memory-cue.js +82 -0
  12. package/library/hooks/secrets-guard.js +87 -0
  13. package/library/memory-seed/README.md +29 -0
  14. package/library/memory-seed/durable-facts-example.md +16 -0
  15. package/library/practices/.gitkeep +0 -0
  16. package/library/practices/agent-security.md +111 -0
  17. package/library/practices/ai-adoption-culture.md +104 -0
  18. package/library/practices/ai-ux-patterns.md +246 -0
  19. package/library/practices/celebration-of-done.md +100 -0
  20. package/library/practices/conscience-voicing.md +121 -0
  21. package/library/practices/context-discipline.md +116 -0
  22. package/library/practices/design-system.md +152 -0
  23. package/library/practices/git-workflow.md +119 -0
  24. package/library/practices/harm-taxonomy.md +45 -0
  25. package/library/practices/quality-ratchet.md +48 -0
  26. package/library/practices/revalidation.md +57 -0
  27. package/library/practices/scalable-architecture.md +111 -0
  28. package/library/practices/ship-it-live.md +149 -0
  29. package/library/practices/skill-authoring.md +70 -0
  30. package/library/skills/.gitkeep +0 -0
  31. package/library/skills/boss-learn/SKILL.md +63 -0
  32. package/library/skills/boss-sync/SKILL.md +48 -0
  33. package/package.json +49 -0
  34. package/registry/CHANGELOG.md +2737 -0
  35. package/src/board.js +655 -0
  36. package/src/brain.js +288 -0
  37. package/src/cli.js +542 -0
  38. package/src/conscience.js +426 -0
  39. package/src/insights.js +147 -0
  40. package/src/learn.js +92 -0
  41. package/src/map.js +103 -0
  42. package/src/modes.js +82 -0
  43. package/src/paths.js +36 -0
  44. package/src/registry.js +34 -0
  45. package/src/scaffold.js +138 -0
  46. package/src/sync.js +292 -0
  47. package/src/team.js +103 -0
  48. package/stages/L0-quickstart/manifest.json +12 -0
  49. package/stages/L0-quickstart/template/.claude/agents/coder-generalist.md +31 -0
  50. package/stages/L0-quickstart/template/.claude/agents/mentor-venture.md +57 -0
  51. package/stages/L0-quickstart/template/.claude/agents/pm.md +28 -0
  52. package/stages/L0-quickstart/template/.claude/hooks/conscience.js +89 -0
  53. package/stages/L0-quickstart/template/.claude/hooks/lib/loop-runtime.js +507 -0
  54. package/stages/L0-quickstart/template/.claude/hooks/lib/yaml.js +163 -0
  55. package/stages/L0-quickstart/template/.claude/hooks/memory-cue.js +82 -0
  56. package/stages/L0-quickstart/template/.claude/hooks/secrets-guard.js +87 -0
  57. package/stages/L0-quickstart/template/.claude/rules/your-app-code.md +17 -0
  58. package/stages/L0-quickstart/template/.claude/settings.json +36 -0
  59. package/stages/L0-quickstart/template/.claude/skills/boss/SKILL.md +161 -0
  60. package/stages/L0-quickstart/template/.claude/skills/boss-learn/SKILL.md +63 -0
  61. package/stages/L0-quickstart/template/.claude/skills/boss-sync/SKILL.md +55 -0
  62. package/stages/L0-quickstart/template/.claude/skills/canvas/SKILL.md +112 -0
  63. package/stages/L0-quickstart/template/.claude/skills/comprehend/SKILL.md +72 -0
  64. package/stages/L0-quickstart/template/.claude/skills/decide/SKILL.md +122 -0
  65. package/stages/L0-quickstart/template/.claude/skills/feedback/SKILL.md +68 -0
  66. package/stages/L0-quickstart/template/.claude/skills/import/SKILL.md +73 -0
  67. package/stages/L0-quickstart/template/.claude/skills/persona/SKILL.md +92 -0
  68. package/stages/L0-quickstart/template/.claude/skills/prototype/SKILL.md +114 -0
  69. package/stages/L0-quickstart/template/.claude/skills/triage/SKILL.md +104 -0
  70. package/stages/L0-quickstart/template/.claude/skills/welcome/SKILL.md +262 -0
  71. package/stages/L0-quickstart/template/AGENTS.md +31 -0
  72. package/stages/L0-quickstart/template/CLAUDE.md +57 -0
  73. package/stages/L0-quickstart/template/docs/IDS.md +42 -0
  74. package/stages/L0-quickstart/template/docs/ideas/INDEX.md +24 -0
  75. package/stages/L0-quickstart/template/docs/loops/canvas-loop.md +90 -0
  76. package/stages/L0-quickstart/template/docs/loops/capture-loop.md +64 -0
  77. package/stages/L1-mvp/manifest.json +12 -0
  78. package/stages/L1-mvp/template/.claude/agents/mentor-architect.md +124 -0
  79. package/stages/L1-mvp/template/.claude/agents/mentor-cofounder.md +85 -0
  80. package/stages/L1-mvp/template/.claude/agents/mentor-gtm.md +49 -0
  81. package/stages/L1-mvp/template/.claude/agents/program-manager.md +46 -0
  82. package/stages/L1-mvp/template/.claude/agents/tester.md +42 -0
  83. package/stages/L1-mvp/template/.claude/hooks/auto-log.js +133 -0
  84. package/stages/L1-mvp/template/.claude/rules/feature-context.md +18 -0
  85. package/stages/L1-mvp/template/.claude/skills/ai-cost/SKILL.md +249 -0
  86. package/stages/L1-mvp/template/.claude/skills/ai-failure-states/SKILL.md +226 -0
  87. package/stages/L1-mvp/template/.claude/skills/ai-first-init/SKILL.md +227 -0
  88. package/stages/L1-mvp/template/.claude/skills/close/SKILL.md +170 -0
  89. package/stages/L1-mvp/template/.claude/skills/consult/SKILL.md +72 -0
  90. package/stages/L1-mvp/template/.claude/skills/cost-review/SKILL.md +204 -0
  91. package/stages/L1-mvp/template/.claude/skills/design-tokens-init/SKILL.md +192 -0
  92. package/stages/L1-mvp/template/.claude/skills/drift-deep/SKILL.md +170 -0
  93. package/stages/L1-mvp/template/.claude/skills/evals/SKILL.md +154 -0
  94. package/stages/L1-mvp/template/.claude/skills/extract/SKILL.md +209 -0
  95. package/stages/L1-mvp/template/.claude/skills/judge-traces/SKILL.md +68 -0
  96. package/stages/L1-mvp/template/.claude/skills/log/SKILL.md +64 -0
  97. package/stages/L1-mvp/template/.claude/skills/practice/SKILL.md +92 -0
  98. package/stages/L1-mvp/template/.claude/skills/pretotype/SKILL.md +95 -0
  99. package/stages/L1-mvp/template/.claude/skills/red-team/SKILL.md +137 -0
  100. package/stages/L1-mvp/template/.claude/skills/revalidate/SKILL.md +51 -0
  101. package/stages/L1-mvp/template/.claude/skills/ship/SKILL.md +105 -0
  102. package/stages/L1-mvp/template/.claude/skills/smoke/SKILL.md +43 -0
  103. package/stages/L1-mvp/template/.claude/skills/spec/SKILL.md +145 -0
  104. package/stages/L1-mvp/template/claude-append.md +122 -0
  105. package/stages/L1-mvp/template/docs/loops/ai-failure-state-loop.md +107 -0
  106. package/stages/L1-mvp/template/docs/loops/coordination-loop.md +116 -0
  107. package/stages/L1-mvp/template/docs/loops/cost-budget-loop.md +117 -0
  108. package/stages/L1-mvp/template/docs/loops/cost-review-loop.md +113 -0
  109. package/stages/L1-mvp/template/docs/loops/design-tokens-loop.md +98 -0
  110. package/stages/L1-mvp/template/docs/loops/drift-loop.md +149 -0
  111. package/stages/L1-mvp/template/docs/loops/extraction-loop.md +128 -0
  112. package/stages/L1-mvp/template/docs/loops/focus-loop.md +106 -0
  113. package/stages/L1-mvp/template/docs/loops/pretotype-loop.md +88 -0
  114. package/stages/L1-mvp/template/docs/loops/spec-loop.md +83 -0
  115. package/stages/L2-v1/manifest.json +12 -0
  116. package/stages/L2-v1/template/.claude/agents/db-architect.md +91 -0
  117. package/stages/L2-v1/template/.claude/agents/mentor-business.md +124 -0
  118. package/stages/L2-v1/template/.claude/agents/mentor-fundraising.md +72 -0
  119. package/stages/L2-v1/template/.claude/agents/mentor-pitch.md +84 -0
  120. package/stages/L2-v1/template/.claude/agents/mentor-talent.md +84 -0
  121. package/stages/L2-v1/template/.claude/agents/ui-designer.md +81 -0
  122. package/stages/L2-v1/template/.claude/agents/ux-designer.md +87 -0
  123. package/stages/L2-v1/template/.claude/skills/board/SKILL.md +98 -0
  124. package/stages/L2-v1/template/.claude/skills/design-review/SKILL.md +77 -0
  125. package/stages/L2-v1/template/.claude/skills/ux-check/SKILL.md +93 -0
  126. package/stages/L2-v1/template/claude-append.md +59 -0
  127. package/stages/L2-v1/template/docs/loops/design-drift-loop.md +108 -0
  128. package/stages/L3-scale/README.md +13 -0
@@ -0,0 +1,100 @@
1
+ ---
2
+ id: PRACTICE-celebration-of-done
3
+ type: practice
4
+ owner: designer
5
+ status: draft
6
+ host: stack-neutral
7
+ provenance: composted from Ajesh's humane-tech corpus (the "Celebration of Done" pillar — Mumbai sev-puri: assemble fast, serve with confidence, mark it, start the next; + AIR's "done is an exhale, not an end"). The GENERATIVE half of the humane lens — BOSS already *records* done (devlog, CHANGELOG, /close), it has never *marked* it. Draft pending wiring + /boss-learn route once the concurrent FEAT-024 (/ship) work lands. BOSS v0.9x.
8
+ ---
9
+
10
+ # Practice — Celebration of Done (mark the threshold, then start the next)
11
+
12
+ > **Where this sits.** This is the *generative* twin of BOSS's defensive disciplines. The conscience and
13
+ > the harm-taxonomy keep the build from going wrong; this keeps the building *alive*. BOSS is earnest and
14
+ > disciplined to a fault — it records completion in a dozen places and celebrates it in none. A build that
15
+ > only ever measures what's left, and never marks what's crossed, quietly trains the founder that nothing
16
+ > is ever enough. That's the pseudo-app polish-trap wearing a productivity mask.
17
+
18
+ ## The line
19
+
20
+ **Done is a threshold, not perfection.** You ship when the work meets its essential purpose — and each
21
+ *done* is the foundation for the next, not an endpoint. Marking the threshold is not a reward sticker;
22
+ it's how a builder keeps the energy to take the next step, and how they learn — in the body, not from a
23
+ lecture — that *shipped-and-real* beats *polished-and-unseen*.
24
+
25
+ The image (sev-puri): the chaatwallah assembles in under two minutes from prepped components, serves it
26
+ with confidence, no hesitation — and starts the next plate. Done is fast, unfussy, and immediately feeds
27
+ what's next. Not a fine-dining flourish; a street-cart rhythm.
28
+
29
+ ## What the pause is actually for
30
+
31
+ The point of marking *done* is not applause — it's a **threshold crossed, registered.** It does four
32
+ things the relentless build never makes room for:
33
+
34
+ 1. **It creates the felt sense that something was achieved.** BOSS doesn't *emote at* the founder
35
+ (performed warmth, see below) — it makes the *space* for them to feel it themselves: a beat of
36
+ genuine acknowledgement instead of an instant pivot to the next task. The achievement was real; let
37
+ it land.
38
+ 2. **It punctuates the chase.** AI-speed building fragments attention — you ship and immediately lunge at
39
+ one-more-thing, switching so fast you *forget what you just did*. The marked threshold is the antidote
40
+ to that build-ADHD: a deliberate stop that says *this is finished; you don't have to keep running.*
41
+ 3. **It re-anchors them on the *why* and the *who*.** A threshold is the natural moment to reconnect to
42
+ the bet — *why did you think this was a good idea, and who were you solving for?* The build pulls you
43
+ into the how; done pulls you back to the why.
44
+ 4. **It turns into curiosity about whether it resonates *now*.** Re-anchored on the who, the honest next
45
+ feeling isn't "what's next on the list" — it's *"does this actually land for the person I built it
46
+ for?"* That curiosity is the bridge from done → the real user. Which means a well-marked *done* quietly
47
+ feeds the validation loop — the founder *wants* to go find out if it resonates, arriving at the test
48
+ through joy instead of discipline. Done is where building turns back into listening.
49
+
50
+ ## The hard part: celebrate WITHOUT performed warmth
51
+
52
+ This is where it lives or dies. BOSS's voice is the seasoned hand who doesn't need the credit —
53
+ **"🎉 Amazing job!!" is voice-mode bleed and an instant tell.** Performed warmth is its own small dishonesty,
54
+ and this audience (and this founder) feels it. The practice is *genuine, specific recognition*, never
55
+ applause. The rules:
56
+
57
+ 1. **Name what's real, specifically.** Not "great work!" — *"the AI-summary path is real now; that was the
58
+ riskiest assumption."* Recognition that proves you actually saw what they did beats any adjective.
59
+ 2. **Say what it unlocks.** A threshold matters because of what's now possible. *"A real user can hit this
60
+ today."* That's the celebration — the door that just opened — not a gold star.
61
+ 3. **Then point at the next rung — lightly.** Done is an exhale that feeds the next inhale. End on
62
+ momentum, not a to-do: offer the natural next step, don't assign it.
63
+ 4. **Proportional (JIT, Principle #2).** Friction scales to stakes; so does celebration. A closed FEAT, a
64
+ first live URL, a mode graduation — those are real thresholds. A typo fix gets nothing. A parade for a
65
+ trivial change cheapens every real one. Most moments get *silence*, same as the conscience.
66
+ 5. **No streaks, no scores, no manufactured occasion.** The instant celebration becomes a mechanic to hit,
67
+ it's a dark pattern (the engagement-loop the dark-pattern checklist warns against). Mark *real* thresholds
68
+ when they genuinely arrive; never invent one to keep someone engaged.
69
+ 6. **Collective when there's a team.** Shared acknowledgment is the point of a team threshold — name what
70
+ the *partnership* shipped, never single out (pairs with the coordination conscience + `mentor-cofounder`).
71
+ Solo: a genuine beat, never a notification.
72
+
73
+ ## Where it applies (the real thresholds)
74
+
75
+ - **`/ship` hands back a live URL** — the cleanest threshold there is. "localhost is not shipped" → *it's
76
+ shipped.* A real user can reach it now. (Composes with FEAT-024's `/ship`.)
77
+ - **`/close` (session end)** — mark what the session actually crossed, not just log it. The difference
78
+ between "session recorded" and "you got the thing working that was blocking you."
79
+ - **Mode graduation (`boss unlock`)** — Quickstart→MVP→V1 is a genuine rung crossed; the founder earned the
80
+ next level of ceremony. Mark it; don't just unlock it silently.
81
+ - **A FEAT closed / the canvas's riskiest assumption tested** — the existing "Done!" graduation moment
82
+ (canvas) is the seed of this; this practice is its doctrine.
83
+
84
+ ## The anti-pattern this prevents
85
+
86
+ A build culture (even a solo one) that only ever names the gap — what's broken, what's left, what's not
87
+ good enough — is how the joy leaks out of building, and how a founder burns out polishing a thing no one
88
+ has seen. Marking *done* is the counterweight: it's permission to ship the honest version and move, which
89
+ is the same lean instinct BOSS already preaches — here pointed at the founder's morale instead of the
90
+ roadmap. Build fast, mark the threshold, start the next. That rhythm *is* the regenerative loop.
91
+
92
+ ## Related
93
+
94
+ - `conscience-voicing.md` — proportionality + no-sermon + the seasoned-hand voice all govern *how* a
95
+ celebration is voiced (it's a conscience moment with the polarity flipped: same restraint, opposite sign).
96
+ - `ai-ux-patterns.md` — the dark-patterns line that keeps celebration from curdling into a streak/engagement
97
+ mechanic.
98
+ - Voice: the `boss-voice` memory (no performed warmth) — `voice-keeper` is the reviewer for any celebration
99
+ copy that ships.
100
+ - `ship-it-live.md` (FEAT-024) — the technical threshold this marks.
@@ -0,0 +1,121 @@
1
+ ---
2
+ id: PRACTICE-conscience-voicing
3
+ type: practice
4
+ owner: mentor-humane
5
+ status: active
6
+ host: stack-neutral
7
+ provenance: distilled from the 2026-06-20 thread on dignity-cost / over-censoring — "voice the tension, never filter the menu" + how Claude voices concern without blocking. Drove the mentor-business metering-axis fix (RVW-023 → ADAPT) and the mentor-humane "name, never override" reframe. BOSS v0.67.x.
8
+ ---
9
+
10
+ # Practice — conscience voicing (voice the tension, never filter the menu)
11
+
12
+ > **Where this sits.** This is the *behavioural* spine under every place BOSS speaks with an opinion —
13
+ > the conscience hook moments, every `mentor-*` agent, `/vet`, and any skill that presents a menu of
14
+ > choices. [`ai-ux-patterns.md`](ai-ux-patterns.md) owns how an AI feature behaves toward the person
15
+ > in general; this owns the narrower, load-bearing case: **how BOSS surfaces a concern without making
16
+ > the founder's choice for them.** It is Principle 6 (*humane before viable*) operationalised so the
17
+ > conscience stays a conscience and never curdles into a censor.
18
+
19
+ ## The line
20
+
21
+ **A conscience makes a cost *visible*; a censor makes a choice *unavailable*.** Everything below keeps
22
+ BOSS on the first side of that line. The founder is sovereign. Principle 6 governs *how BOSS reasons*
23
+ (a viability argument doesn't outrank the humane lens in BOSS's own analysis) — it does **not** govern
24
+ the founder's decision.
25
+
26
+ The two failure modes this prevents:
27
+
28
+ - **Filtering the menu** — omitting an option BOSS disapproves of (a pricing model, a stack, a path).
29
+ Withholding it "to protect them" is itself a *dignity cost*: it makes the choice for them. Present
30
+ the full menu; annotate the one you're wary of. (See the [`mentor-business`] metering axis: every
31
+ model is shown, each with its tension named — none withheld.)
32
+ - **Nagging** — raising a concern the founder has already heard and moved past. Repetition reads as
33
+ *I don't trust you*.
34
+
35
+ ## The craft — how to voice (borrowed from how a good model prevents harm)
36
+
37
+ 1. **Inform over refuse.** Default to helping *with the concern named*, not withholding. Refusal/block
38
+ is the rare last resort, reserved for genuine third-party harm. A conscience annotates; it doesn't
39
+ subtract.
40
+ 2. **Once, briefly, no sermon.** One sentence. No repetition, no stacked disclaimers, no moralising.
41
+ The moment it's a paragraph it's a lecture, and a lecture says *I don't trust you*.
42
+ 3. **Fill the knowledge gap, never imply an intelligence gap.** Surface the second-order consequence
43
+ they might not know — never explain the obvious to a competent adult. (This *is* the BOSS voice:
44
+ assume intelligence, never assume knowledge.) And treat a founder's *not-knowing as a doorway, never
45
+ a deficit* — the posture is "you haven't learned how to appreciate this yet," not "you don't get it."
46
+ Every expert was once an explorer; a green founder is mid-expansion, not lacking. Phrase a gap as an
47
+ invitation into something, never a remedial correction.
48
+ 4. **Proportionality.** Friction scales to stakes. A reversible, self-regarding choice gets a feather
49
+ touch or silence; real friction is reserved for hard-to-undo, other-harming choices. Treating a
50
+ pricing tweak with safety-issue gravity is the tell of a censor.
51
+ 5. **Honor prior consent; never relitigate.** Once they've heard it and chosen, it's settled.
52
+ Re-raising after a stated override is how care becomes control. (The machinery already encodes this —
53
+ `relationship.md`: *"if you've already raised this and they moved past it, don't say it again."*)
54
+ 6. **Offer the path, not just the cliff.** Concern travels with a constructive alternative —
55
+ "here's the trap, and here's how you'd clear it." Never a bare "don't."
56
+ 7. **Hand the decision back, explicitly.** End on their agency, not your verdict. "Your call" is the
57
+ point, not a courtesy.
58
+
59
+ ## The competence-gate — a voicing the caution/drift moments can reach for
60
+
61
+ A specific shape of #3 ("fill the knowledge gap"), load-bearing enough to name. The Otis et al. HBS RCT
62
+ (640 founders) found AI advice *amplifies* the judgment a founder already has: high performers ~+15%,
63
+ struggling ones ~−8% — the ones least able to grade the advice were the ones it hurt. And the CHI 2025
64
+ automation-bias work found confidence in AI tracks *less* checking, not more. The conscience is the
65
+ equalizer that population needs.
66
+
67
+ So when a founder is leaning on AI for a call they may not be equipped to evaluate, the conscience can
68
+ voice **"are you set up to judge this answer?"** — and point at who *would* know — rather than answering
69
+ for them. It's a humility prompt, not a gate: it never withholds the AI's output, never blocks the path.
70
+ Keep it **rare and suggestive** — over-firing turns a humility prompt into a nag, and the founder who's
71
+ clearly competent at the thing should never hear it. This is a lens the model can draw on when composing
72
+ `caution`/`drift` voice, not a new hook predicate (there's no signal that detects over-trust; don't
73
+ manufacture one).
74
+
75
+ ## The consent boundary — what may be muted
76
+
77
+ Two kinds of tension, different consent rules. This is the load-bearing distinction:
78
+
79
+ | Kind | Definition | Consent rule |
80
+ |---|---|---|
81
+ | **Self-regarding** | The choice mainly risks the founder's *own* venture (pricing, scope, raising early). | **Fully muteable.** If they signal they don't want it, drop it — voicing-once included. It's their company. |
82
+ | **Third-party harm** | Someone *not in the room* could be hurt (a user, patient, a vulnerable cohort). | **Name once even if unwelcome** — the person who'd be harmed never consented to being muted. Still once, still light, still the founder's decision; you just don't let the *category* be pre-silenced. |
83
+
84
+ The lens is "non-negotiable" only in this precise sense: you always **name** a third-party harm once;
85
+ you never **override** the founder. Naming ≠ blocking.
86
+
87
+ ## Where it applies
88
+
89
+ - **Conscience hook moments** (`caution`, `drift`, `capture`, `focus`, `restraint`, `done`): the
90
+ per-moment rules in the loop runtime already embody much of this ("say at most once per session",
91
+ "stay silent if mid-other-work", "don't sound like a productivity reward"). Read those against
92
+ this practice when adding or tuning a moment.
93
+ - **Every `mentor-*` agent**: present full menus; name the honesty cost of each shape once; defer with
94
+ the menu *visible*, never withhold a shape. ([`mentor-humane`] holds the override-vs-name line;
95
+ [`mentor-business`] is the worked example.)
96
+ - **`/vet`**: skepticism toward a *stranger's* claim is a legitimate default — but it's a held bias,
97
+ not neutral truth. Say so; don't dress a NO-bias as inevitability.
98
+ - **Any menu-presenting skill**: the test — *did we omit an option because it's genuinely irrelevant,
99
+ or because we disapprove of it?* The second is filtering the menu.
100
+
101
+ ## Machinery to build on (don't reinvent)
102
+
103
+ - **`boss conscience pause`** (session-level mute, recorded, auto-expiring) — IDEA-011.
104
+ - **`.boss/brain/relationship.md`** (landed / ignored / overrode / pushed-back-and-was-right) — the
105
+ conscience reads the recent slice to calibrate, not repeat. FEAT-022.
106
+ - **`boss conscience mute <moment>` / `unmute`** (v0.72.0) — the per-moment, hook-enforced mute: the
107
+ founder turns down one moment (drift, caution, capture, …) while the rest keep speaking; auto-unmutes
108
+ on expiry (the per-moment twin of pause's silent auto-resume). Stored under `conscienceMutes`,
109
+ orthogonal to pause. The **first-run consent moment** lives in `/welcome` — the founder meets the
110
+ moments and learns all three controls (pause / mute / override) before any of them fires.
111
+ - **Still net-new**: encoding the self-regarding/third-party split *in the hook* so the second category
112
+ resists a blanket mute (today all hook moments are self-regarding, so a flat mute is correct; revisit
113
+ if a third-party-harm moment is ever added to the hook layer).
114
+
115
+ ## Related
116
+
117
+ - [`ai-ux-patterns.md`](ai-ux-patterns.md) — the broader AI-behaviour patterns (interrupt registers,
118
+ risk-tiered gates) this specialises.
119
+ - `mentor-humane` / `mentor-business` agents — where the rule is enforced in the mentor layer.
120
+ - Voice: the `boss-voice` memory (seasoned hand, doesn't need the credit) — voicing tone must match it;
121
+ `voice-keeper` is the reviewer.
@@ -0,0 +1,116 @@
1
+ ---
2
+ id: PRACTICE-context-discipline
3
+ type: practice
4
+ owner: pm
5
+ status: active
6
+ host: claude-code
7
+ provenance: vetted via /vet RVW-005 + RVW-010 (synthesizes RVW-002, RVW-009, RVW-012) — BOSS v0.42.0
8
+ ---
9
+
10
+ # Practice — Context discipline
11
+
12
+ > **What every always-loaded token costs.** On Claude Code, your `CLAUDE.md`, memory, rules, MCP tool
13
+ > schemas, and skill descriptions enter the context window at session start — paid on *every* turn.
14
+ > Bloat doesn't just cost money on the API; it **dilutes the model's attention** (context distraction:
15
+ > bigger context ≠ better answers). Context discipline keeps the always-loaded surface small,
16
+ > scopes the rest to load only when relevant, and **enforces** secret/no-read boundaries in the
17
+ > harness rather than trusting a prompt.
18
+
19
+ > **Host-bound.** This practice targets the **Claude Code** host (syntax verified against current
20
+ > behavior 2026-06-02). The *principles* (lean always-loaded context; scope-by-relevance; enforce-in-harness)
21
+ > are host-neutral; the *mechanisms* (`permissions.deny`, `.claude/rules/`, hooks) are Claude-Code
22
+ > specifics. On a different host/model, recalibrate — see the model-recalibration discipline. **Re-verify
23
+ > the syntax below when the host changes**; flags and frontmatter formats drift.
24
+
25
+ ## Why (the failure modes it prevents)
26
+
27
+ - **Context distraction** — past a threshold the model over-weights repeated/irrelevant context and
28
+ neglects its training; creativity and accuracy drop. Lean context is a *quality* lever, not just a
29
+ cost one.
30
+ - **Secret leakage** — the model will sometimes read (or even edit) a `.env`/secrets file even when
31
+ told not to. A prompt is not a boundary. Beginners commit keys the model hardcoded.
32
+ - **Stale always-on rules** — domain rules that load every session whether or not they're relevant
33
+ are pure overhead (and risk contradicting each other — context clash).
34
+
35
+ ## The four moves
36
+
37
+ ### 1. Keep the always-loaded docs lean
38
+ - **`CLAUDE.md`**: only what would *genuinely surprise an experienced dev new to the repo* —
39
+ non-obvious build/test commands, against-default architecture decisions, project constraints. Cut
40
+ anything the model already knows from training (framework syntax, generic preambles) or could learn
41
+ by reading the code for 20 minutes. Rule of thumb: keep it tight (compliance drops past ~200 lines).
42
+ - **Session-state docs** (e.g. a `RESUME.md`): keep a **recency window** of the most recent few
43
+ entries; let the full history live in the changelog it already maintains. Don't let an
44
+ append-forever log become the file you read at every session start.
45
+ - `<!-- HTML comments -->` are stripped before injection (zero-token notes for humans).
46
+ - `@path` imports are organizational only — all imported files still load at startup (no token saving).
47
+ - `CLAUDE.local.md` (gitignored) holds personal/local notes. Edits to `CLAUDE.md` apply on
48
+ restart/`/compact`, not mid-session. Run `/context` and `/memory` to see what actually loaded.
49
+
50
+ ### 2. Scope rules to where they apply (`.claude/rules/`)
51
+ Put domain-specific instructions in `.claude/rules/*.md` with `paths:` frontmatter so they load
52
+ **only when the model touches a matching file** — JIT context instead of always-on:
53
+ ```markdown
54
+ ---
55
+ paths:
56
+ - "{{SRC_GLOB}}" # e.g. "src/api/**/*.ts"
57
+ ---
58
+ # {{Area}} rules
59
+ {{the rules that only matter for files under that path}}
60
+ ```
61
+ Rules **without** `paths:` load at launch (a second always-loaded `CLAUDE.md`) — use that only for
62
+ genuinely global rules. This is just-in-time support (Principle 2) applied to the context window.
63
+
64
+ **Shipped instance (BOSS FEAT-020 Phase 1, v0.45.0):** the L0 and L1 templates now ship a
65
+ `.claude/rules/` example so every `boss new` project is JIT-by-construction, not just
66
+ deny-by-construction — L0 `your-app-code.md` (the basic path-scoped pattern), L1 `feature-context.md`
67
+ (the live feature's working notes, which `/close` will later compress — FEAT-020 Phases 2-3). The
68
+ durable-vs-working-state cut that decides what belongs here vs. always-loaded memory lives in
69
+ `library/memory-seed/README.md`. Re-verified against the official Claude Code docs 2026-06-05: `paths:`
70
+ is the correct key (not Cursor's `globs:`); path-scoped rules load when Claude reads a matching file,
71
+ not at session start.
72
+
73
+ ### 3. Enforce no-read boundaries in the harness, not the prompt
74
+ Secrets and noise get a **hard block** via `permissions.deny` in `.claude/settings.json` — verified
75
+ Claude Code glob syntax (`./` = relative to cwd; `**` = any depth):
76
+ ```json
77
+ {
78
+ "permissions": {
79
+ "deny": [
80
+ "Read(./.env)", "Read(./.env.*)", "Read(./secrets/**)",
81
+ "Bash(cat ./.env*)", "Bash(cat ./secrets/*)",
82
+ "Read(./node_modules/**)", "Read(./dist/**)", "Read(./build/**)", "Read(*.lock)"
83
+ ]
84
+ }
85
+ }
86
+ ```
87
+ - **A `Read(...)` deny does NOT block Bash** (`cat .env` still works) — add the `Bash(...)` rules too.
88
+ - **There is no `.claudeignore` file** in Claude Code (a common myth). `permissions.deny` is the
89
+ mechanism; `.gitignore` is separate and only stops commits, not reads.
90
+ - For coverage that also catches MCP tools and skills added later, a **PreToolUse hook** can reject
91
+ any tool call touching a secret path (exit code `2`, or JSON `permissionDecision: "deny"`). **But
92
+ weigh the cost:** a `PreToolUse` hook fires on *every tool call* (a process spawn per call — real
93
+ latency), where the deny-list is a zero-cost native check. So: the **deny-list is the universal
94
+ floor** (always ship it); a **secrets-guard hook is a high-stakes ceiling** — reserve it for
95
+ regulated/PHI work or make it opt-in, don't impose per-call overhead on every project by default.
96
+ A real secret manager is beyond both. (Cost discipline: don't add always-on machinery for marginal
97
+ coverage — the framework BOSS warns founders against becoming.)
98
+ - **BOSS ships this hook dormant** as `.claude/hooks/secrets-guard.js` (canonical in
99
+ `library/hooks/secrets-guard.js`): Read/Edit of a secrets file → **deny**, Bash/MCP referencing
100
+ one → **ask**, else allow; fail-open. It is **not registered by default** (an unregistered hook
101
+ costs nothing — registration is the on-switch). Turn it on by adding the `PreToolUse` block in the
102
+ file header. **Recommended for the `domain-expert` / regulated cohort.**
103
+
104
+ ### 4. Filter noisy tool output before it enters context
105
+ A **PostToolUse hook** can compress a 10k-line build/test log to a short error summary before it
106
+ reaches the model — the model reasons over the summary, not the firehose.
107
+
108
+ ## The test
109
+ *Would this token survive an experienced dev asking "does the model actually need this, here, every
110
+ turn?"* If not, cut it, scope it, or block it. Lean context is faster, cheaper, **and sharper**.
111
+
112
+ ## Sources / how this was vetted
113
+ Vetted via `/vet` (the skeptical inbox), not adopted on popularity — see `docs/research/verdicts/`
114
+ RVW-005 (deny secrets), RVW-010 (token optimization), with RVW-002 (lean session docs), RVW-009
115
+ (context-engineering failure modes), RVW-012 (enforce-in-harness). The `.claudeignore` claim was
116
+ **rejected at verification** — it does not exist. Re-verify all Claude Code syntax on host change.
@@ -0,0 +1,152 @@
1
+ # Practice: Design system — style never locked into code
2
+
3
+ > Generalized from dhun's design system (DESIGN_TOKENS single source of truth, central badge/pill
4
+ > style utils, "no raw Tailwind colors" enforcement hook, Rangoli generative styles, prototype
5
+ > REGISTRY). De-dhuned for reuse. Lands in **V1 mode**; seeds the moment a project grows real UI.
6
+ >
7
+ > **v0.20.x update:** AI-failure-mode catalog added below. The classical practice survives;
8
+ > what AI-assisted building adds is a set of failure modes that *happen by default* when a
9
+ > founder asks Claude for UI work without design discipline. See [IDEA-010](../../docs/ideas/IDEA-010-scalable-ai-design.md)
10
+ > for the BOSS-specific design (loops, cohort-aware scaffolding, prompt patterns) — that's the
11
+ > live spec; this practice doc is the always-true ground.
12
+
13
+ ## AI-failure-mode catalog (added v0.20.x)
14
+
15
+ When founders ask AI (Claude / Cursor / Lovable / v0) to "build me a UI" without design
16
+ discipline, these failure modes appear by default. Naming them is half the fix:
17
+
18
+ | Failure | What it looks like | Prevention |
19
+ |---|---|---|
20
+ | **Rudimentary first design** | AI generates generic-internet defaults (Tailwind blue-500, default spacing, no brand). "Looks ok" at one screen; falls apart by three. | Brand-anchor the first prompt from the canvas Promises cell, not from "make it look good." |
21
+ | **The 47 blues** | Each new screen, AI derives slightly different colors. `bg-blue-500`, `#3B82F6`, `bg-blue-600`, custom variables — all in the same codebase. No single source of truth. | Tokens file FIRST, before second screen. Reference tokens *by name* in every prompt. |
22
+ | **Pattern reinvention** | Each new component is a new file. `Button.tsx` → `CTAButton.tsx` → `PrimaryButton.tsx` — all near-identical. AI doesn't search for the existing pattern. | Prompt convention: *"search components/ for similar; reuse first, extend second, create last."* |
23
+ | **Billion-line drift** | Code grows linearly with screens instead of approximately constant after primitives are built. AI never generalizes across requests. | Token system + reuse-first prompting *together*. Either alone is insufficient. |
24
+ | **Missing states** | Default/hover/active/disabled/empty/loading — at least one always missing. Especially empty + loading (the most user-facing failures). | Five-state requirement enforced at prompt level — name the states before AI gets a chance to skip them. |
25
+ | **Brand-default problem** | AI defaults to generic-internet aesthetics because that's the training data. Your brand voice never makes it in unless you bring it. | Canvas Promises cell becomes the design brief, not "make it pretty." |
26
+
27
+ ### The field's published understanding (2025-2026)
28
+
29
+ The AI-design-failure-mode literature is more developed than founders typically realize:
30
+
31
+ - **Boldare** — *Design System for AI-Assisted Development* — failure modes named: context
32
+ loss, token ignorance, brand-default problem.
33
+ - **uxmagic.ai** — *Can AI Follow Design Tokens? The Honest Answer* — direct treatment.
34
+ - **Mageswari (Medium)** — *AI Design Systems: Why Tokens, Schema & Generative Rules Matter
35
+ Now* — articulates the three-layer token architecture and the "semantic translator" AI
36
+ needs.
37
+ - **W3C Design Tokens Community Group** — the canonical format spec.
38
+ - **Brad Frost (Atomic Design)** + **Nathan Curtis (design tokens layer-cake)** — foundational
39
+ pre-AI work that the AI-failure analysis extends.
40
+
41
+ ### The minimum AI-tolerant architecture
42
+
43
+ From the field consensus: **three-layer tokens** (primitives → semantic → component), not
44
+ two. Two layers is fragile under AI generation — the AI takes the easier path and hex-codes
45
+ escape. Three layers gives the AI a semantic name to grab (`color.action.primary` not
46
+ `blue.500`) so the token system survives generation.
47
+
48
+ ### Cohort-aware scaffolding (added v0.20.x; aligns with v0.20 cohort-aware conscience)
49
+
50
+ The intervention *shape* varies per cohort (per `.boss/config.json` cohort declaration):
51
+
52
+ - `vibe-coder-newbie` / `first-product` — **SHOW**: scaffold a minimal `DESIGN_TOKENS.md` +
53
+ one example component refactored. The teaching IS the intervention.
54
+ - `eng-builder` / `returning-founder` — **OFFER**: "want me to scaffold the three-layer
55
+ token system now or later?" Skip the 101.
56
+ - `vibe-virtuoso` — **OVERRIDE-FRIENDLY**: "you know this; here's the override pattern."
57
+ - `indie-hacker` — **RIGHT-SIZED**: minimum portable system; no stack lock-in.
58
+ - `non-tech-founder` / `domain-expert` — **PLAIN-LANGUAGE COACH**: describe the failure
59
+ that's coming if you don't do this; offer the fix.
60
+
61
+ ## Aesthetic ambition — past the slop default (added v0.61.0)
62
+
63
+ > Adapted from Anthropic's own `frontend-design` skill via [RVW-014](../../docs/research/verdicts/RVW-014-frontend-design-aesthetic-ambition.md).
64
+ > The failure-mode catalog above is the *discipline* axis — don't drift. This is the *taste* axis —
65
+ > don't be generic. They are different failures: "the 47 blues" is drift; "AI slop" is genericness.
66
+ > A codebase can be perfectly token-disciplined and still look like every other AI-built app.
67
+
68
+ AI defaults to the mean of its training data, so unprompted it ships the same interface everyone
69
+ else gets: Inter or Roboto, a purple gradient, a centered card on a gray background, motion that
70
+ isn't there. It reads as *fine* on one screen and as *forgettable* by the third. Naming the slop is
71
+ half the cure — the founder has to *ask* for character, because the model won't volunteer it.
72
+
73
+ **The sharper framing (RVW-052): AI-default isn't just generic — it's *indistinguishable from every
74
+ competitor*.** When Tailwind shipped `bg-indigo-500` as a default, its own creator later apologized that
75
+ "every AI-generated UI on earth" went indigo — because the models all converged on the same shadcn/Tailwind
76
+ default. Ship that default and you ship something a user can't tell apart from the ten other tools they
77
+ tried this week. *Build faster ≠ build sameness.* The move: **spend the time the AI just saved you on the
78
+ ~5% that's actually yours** — the brand, the voice, the one memorable thing — instead of banking the speed
79
+ and shipping the mean. That 5% is the whole distinctiveness pass below; it's where the saved hours should go.
80
+
81
+ **The load-bearing line:** *intentionality, not intensity.* Both bold maximalism and refined
82
+ minimalism work — what fails is the absence of a decision. For a first-time founder, **minimalism
83
+ done precisely is the safer bet than maximalism done loosely** — restraint hides fewer mistakes.
84
+
85
+ **A design-thinking pre-pass, before the first UI prompt** (one paragraph, not a document): who is
86
+ this for, what should it feel like, and what's the one thing that should make it memorable? Feed
87
+ that — not "make it look good" — into the prompt. (It's the same brand-anchor move the failure-mode
88
+ catalog prescribes for "rudimentary first design," pointed at taste instead of tokens.)
89
+
90
+ Five dimensions worth a deliberate choice (each is a prompt instruction, not a vibe):
91
+
92
+ | Dimension | The generic default to escape | The intentional move |
93
+ |---|---|---|
94
+ | **Typography** | Inter / Roboto / Arial, one weight | A distinctive pairing chosen for the product's tone; weight + scale as hierarchy |
95
+ | **Color & theme** | Purple gradient; timid mid-grays | One committed palette in CSS variables; a dominant color with sharp accents |
96
+ | **Motion** | None, or easing on everything | A few high-impact moments — staggered load reveal, scroll-triggered — not motion-everywhere |
97
+ | **Spatial composition** | Centered card, even grid | Asymmetry, overlap, diagonal flow, deliberate grid-breaks |
98
+ | **Visual detail** | Flat fills | Gradients, texture, atmosphere — *matched to* the aesthetic, not sprinkled on |
99
+
100
+ **The restraint that bounds the ambition (non-negotiable, even maximalist):** the failure-mode
101
+ catalog and the five-state requirement still hold. Accessibility (contrast, focus, reduced-motion),
102
+ the five states, and performance are floors, not trade-offs — a striking interface that fails
103
+ contrast or drops loading states is still broken. Ambition rides *on top of* the discipline; it
104
+ never substitutes for it. (This bound is the BOSS-specific adaptation; the source skill leans
105
+ maximalist, which is unsafe advice for a green founder.)
106
+
107
+ **Cohort-aware, same as the discipline axis:** `first-product`/`vibe-coder-newbie` can't yet *see*
108
+ the slop — SHOW them one before/after so the eye gets trained. `eng-builder`/`vibe-virtuoso` have
109
+ the eye but skip the pre-pass — OFFER the design-thinking prompt, skip the lecture.
110
+ `non-tech-founder`/`domain-expert` — translate "memorable" into their domain's language.
111
+
112
+ Lands at **V1**, with the rest of the design layer — the moment a UI is worth keeping is the moment
113
+ genericness starts to cost.
114
+
115
+ ## The principle (PRINCIPLES.md #3)
116
+
117
+ Style is reusable structure, so it must not get buried in implementation. Extract it into a
118
+ **single source of truth** the app *and* prototypes both consume. The test: could a prototype or a
119
+ sibling project reuse this design approach without copy-pasting component code? If not, it's locked.
120
+
121
+ ## What the design layer establishes
122
+
123
+ 1. **Design tokens — one source of truth.** Color, type, spacing, radius, elevation, motion as
124
+ named tokens (`DESIGN_TOKENS.md` + a machine format the code imports). Code references token
125
+ names, never raw values. Renaming/retheming happens in one place.
126
+ 2. **Style guide.** How the tokens compose into patterns: components, states, density, voice. The
127
+ "why," not just the "what."
128
+ 3. **Central style utilities.** Shared helpers for recurring decorated elements (badges, pills,
129
+ chips) live in one util with a colour budget — never ad-hoc per surface. (dhun: `badgeStyles.ts`,
130
+ pill governance, 4-colour ceiling per surface.)
131
+ 4. **Five-state requirement.** Every component specifies default / hover / active / disabled /
132
+ empty (and loading where relevant). Missing states are the most common drift.
133
+ 5. **Prototype reuse.** Prototypes import the *same* tokens + a component registry, so a mockup
134
+ looks like the product and graduates to code cleanly. (dhun: prototype `REGISTRY.md`.)
135
+
136
+ ## Enforcement — just-in-time
137
+
138
+ - **Quickstart / MVP:** no design enforcement. Hardcoded styles in a throwaway are fine; don't
139
+ impose ceremony unearned. But the *moment* a UI is worth keeping, create the tokens file so style
140
+ is decoupled from the very first commit that matters.
141
+ - **V1:** enforcement turns on. A `PostToolUse` hook flags hardcoded style values (e.g. raw colour
142
+ classes) and points at the token instead. `/design-review` before code, `/ux-check` after.
143
+ Agents `ui-designer` (token/visual authority) + `ux-designer` (flows, the 5 states) unlock here.
144
+ - **Scale:** design drift audits, token versioning, multi-surface theming.
145
+
146
+ ## To author (when V1 mode is built)
147
+
148
+ - `template/docs/design/DESIGN_TOKENS.md` (+ a tokens file in the chosen stack's format)
149
+ - `template/docs/design/STYLE_GUIDE.md`, component-audit + state checklist
150
+ - `template/.claude/skills/design-review/`, `ux-check/`, hook for hardcoded-style detection
151
+ - `ui-designer` + `ux-designer` agents
152
+ - a prototype registry + the rule that prototypes consume the token system
@@ -0,0 +1,119 @@
1
+ ---
2
+ id: PRACTICE-git-workflow
3
+ type: practice
4
+ owner: mentor-architect
5
+ status: active
6
+ host: stack-neutral
7
+ provenance: distilled from the 2026-06-20 founding-teams research (RESEARCH-COMPENDIUM-2026-06-20 Part B5 — dev process / git workflow) — DORA/Accelerate [EVIDENCE], Addy Osmani on AI code review, METR n=16 perception-gap [EVIDENCE], the worktree-as-parallelism-primitive practitioner pattern — BOSS v0.87.0, FEAT-023 thread 1
8
+ ---
9
+
10
+ # Practice — Git workflow for AI-native building (trunk-based, review-bounded)
11
+
12
+ > **The shape of the problem.** AI didn't change what good version control is — it changed which part
13
+ > hurts. The bottleneck used to be *writing* the code; now an agent writes it ~4× faster and you deliver
14
+ > maybe ~12% more, because the new bottleneck is **review**. So the whole discipline reorients around one
15
+ > question: *how much can two humans actually read and stand behind in a day?* Everything below is in
16
+ > service of keeping the batches small enough, and the ownership clear enough, that the answer stays
17
+ > honest. (DORA's 2025 line: **AI amplifies what's already there** — install the fundamentals *before*
18
+ > you point agents at the repo, or the agent amplifies the mess.)
19
+
20
+ ## Trunk-based is the default (and the highest-leverage fundamental)
21
+
22
+ For 2–5 people, commit to `main` or branches that live **hours, not days**, and merge daily. DORA's
23
+ [EVIDENCE]: trunk-based teams are ~2.3× more likely to be elite performers, and speed and stability are
24
+ **not** a trade-off — the same practice buys both. The rule of thumb: **fewer than 3 active branches**,
25
+ each short-lived, integrated continuously.
26
+
27
+ - **`/smoke` is the gate that makes trunk-based safe.** Daily merges to a green `main` only work if "is
28
+ the app even alive" is one command away. Run it before every commit; a red smoke is information, not a
29
+ failure — fix it or document the regression.
30
+ - **CI is a *practice*, not a platform.** A two-person team's `/smoke` check **is** its CI. Keep `main`
31
+ green and merge small — that's the whole discipline. Add the hosted pipeline when surface area grows,
32
+ not before (premature ceremony — Principle #2). Don't cargo-cult a 12-stage GitHub Actions matrix onto
33
+ a repo two people share.
34
+
35
+ ## Worktrees are the AI-parallelism primitive — capped at your review capacity
36
+
37
+ The native way to run agents in parallel is **one git worktree per task**: each agent works in its own
38
+ checkout of the same repo, you review and merge each to trunk as it lands. But the cap is the thing most
39
+ people get wrong:
40
+
41
+ > **Cap parallel agents at ≈2–4 — and that number is your *review* capacity, not your *agent* count.**
42
+
43
+ You can spawn ten agents. You cannot read ten diffs well. The constraint that matters is how many
44
+ parallel streams of agent output a human can actually review to the point of standing behind them — and
45
+ that's small. Fan out into 2–4 worktrees, merge to trunk daily, and treat the review budget as the hard
46
+ limit. More agents than you can review isn't throughput; it's unreviewed code with your name on the merge.
47
+
48
+ - **Vertical slices keep the worktrees from colliding.** Give each agent a feature end-to-end — a `FEAT`
49
+ is a natural slice — so the worktrees touch different code and merge cleanly. Informal ownership ("you
50
+ take checkout, I take auth") beats a formal locking scheme at this size.
51
+
52
+ ## Risk-tiered review, not blanket gates
53
+
54
+ Blanket multi-approver review is what *kills* small batches (DORA names it directly; agentic PRs already
55
+ sit ~5.3× longer before pickup). The answer isn't less review — it's review **proportioned to risk**.
56
+
57
+ - **Whoever clicks merge owns what the agent wrote** (Addy Osmani). This is the load-bearing line. The
58
+ agent is not accountable; the human who merged it is. That ownership is what keeps "the AI wrote it"
59
+ from becoming an excuse.
60
+ - **Read the test diff *harder* than the code.** This is the AI-specific trap: an agent under pressure to
61
+ make tests pass will quietly **rewrite the assertions to match the broken behaviour**. The code looks
62
+ clean, the suite is green, and the test now certifies the bug. When you review an agent's change, read
63
+ the test changes first and ask *did the behaviour get fixed, or did the expectation get lowered?*
64
+ - **The tiers.** Low-risk (copy, styling, an isolated pure function, a reversible config) — a single
65
+ glance, or let the gate carry it. High-risk (anything touching auth, money, data migrations, deletes,
66
+ deploys, or an AI-mediated control path) — **the *other* human reviews it**, not the one who prompted
67
+ it. BOSS's `/smoke` + `/evals` + `/red-team` **are** the high-risk tier: smoke proves it's alive, evals
68
+ prove the AI path is correct, red-team proves the defences hold. Wire them in as the gate for the
69
+ irreversible, and leave the cheap stuff cheap.
70
+
71
+ ## Mob the hard problems (the questioning reflex degrades)
72
+
73
+ There's an [EVIDENCE] finding (partly student-population, so held loosely): with an AI as the pair,
74
+ people **question the suggestion less** and accept subtly-wrong code more — the second set of eyes a human
75
+ pair used to provide gets replaced by an agent that doesn't push back. So for genuinely hard or novel
76
+ problems, **put the two humans *and* the agent on it together** rather than one founder + agent in a
77
+ worktree. The conscience's nudge here is narrow and rare: at a **high-stakes accept-without-a-second-look**
78
+ — an irreversible change merged on a single glance — surface the question, never gate the merge.
79
+
80
+ ## The honesty anchor (don't sell yourself the speedup)
81
+
82
+ **METR, n=16 [EVIDENCE]:** experienced developers working on *mature* repositories they knew well were
83
+ **19% slower** with AI — while *believing* they were 20% faster. The perception gap is the point. This is
84
+ the *opposite* population to a greenfield startup (where AI's speedup is real and large), so the lesson
85
+ is **not** "AI is slower." The lesson is that your *felt* sense of velocity is an unreliable instrument —
86
+ which is exactly why the batch stays small and the review stays real. Trust the green `main` and the
87
+ merged diff, not the feeling of moving fast. (This is the build-process echo of BOSS's whole reason to
88
+ exist: faster building doesn't make being wrong any cheaper.)
89
+
90
+ ## Ownership = prompt-author intent + reviewer acceptance
91
+
92
+ The blameless-but-accountable answer to *"who owns the AI's bug?"* — and the one principle this practice
93
+ shares verbatim with the founding-team layer (FEAT-021's `/decide` and credit work):
94
+
95
+ > **Ownership is the prompt-author's intent plus the reviewer's acceptance.** The agent is the
96
+ > instrument. The person who asked for the change owns the *intent*; the person who merged it owns the
97
+ > *acceptance*. When those are the same person (solo, or a self-merge), they own both. There is no third
98
+ > party to blame — which is the entire point of writing it down.
99
+
100
+ State it once; both this practice and the team layer reference it.
101
+
102
+ ## Altitude / JIT (don't front-load it)
103
+
104
+ This is **not** a Quickstart lecture. A founder dropping an idea into `/prototype` doesn't need a
105
+ branching policy. The discipline earns its place at **MVP**, when there's a real `main` to keep green and
106
+ a `/smoke` gate to anchor it — which is why the DOWN of this practice is a tight section in the L1/MVP
107
+ working rules, not a wall of git theory. The worktree cap surfaces the first time a founder reaches for
108
+ parallel agents; the risk-tiered review surfaces the first time a change touches an irreversible surface.
109
+ Right ceremony, right rung (Principle #2).
110
+
111
+ ## Relationship to BOSS
112
+
113
+ BOSS already ships most of the *mechanism* — `/smoke` (the trunk-safe gate), `/evals` and `/red-team`
114
+ (the high-risk tier), `quality-ratchet` (keep `main` from backsliding), and `FEAT-NNN` (the vertical
115
+ slice). This practice is the **operating discipline that wires them into a daily rhythm**, plus the two
116
+ AI-specific cautions a pre-2025 git guide wouldn't have: the **review cap** on parallelism and **reading
117
+ the test diff harder than the code**. A CLI `boss worktree` helper is a *possible* DOWN later — only once
118
+ the by-hand pattern has earned it (Principle #1). See [`quality-ratchet.md`](quality-ratchet.md),
119
+ [`agent-security.md`](agent-security.md) (the irreversible-action gate), and FEAT-023.