bossbuild 0.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/LICENSE +21 -0
  2. package/PRINCIPLES.md +70 -0
  3. package/README.md +213 -0
  4. package/VERSION +1 -0
  5. package/bin/boss +3 -0
  6. package/library/README.md +19 -0
  7. package/library/agents/.gitkeep +0 -0
  8. package/library/agents/mentor-venture.md +57 -0
  9. package/library/hooks/.gitkeep +0 -0
  10. package/library/hooks/auto-log.js +133 -0
  11. package/library/hooks/memory-cue.js +82 -0
  12. package/library/hooks/secrets-guard.js +87 -0
  13. package/library/memory-seed/README.md +29 -0
  14. package/library/memory-seed/durable-facts-example.md +16 -0
  15. package/library/practices/.gitkeep +0 -0
  16. package/library/practices/agent-security.md +111 -0
  17. package/library/practices/ai-adoption-culture.md +104 -0
  18. package/library/practices/ai-ux-patterns.md +246 -0
  19. package/library/practices/celebration-of-done.md +100 -0
  20. package/library/practices/conscience-voicing.md +121 -0
  21. package/library/practices/context-discipline.md +116 -0
  22. package/library/practices/design-system.md +152 -0
  23. package/library/practices/git-workflow.md +119 -0
  24. package/library/practices/harm-taxonomy.md +45 -0
  25. package/library/practices/quality-ratchet.md +48 -0
  26. package/library/practices/revalidation.md +57 -0
  27. package/library/practices/scalable-architecture.md +111 -0
  28. package/library/practices/ship-it-live.md +149 -0
  29. package/library/practices/skill-authoring.md +70 -0
  30. package/library/skills/.gitkeep +0 -0
  31. package/library/skills/boss-learn/SKILL.md +63 -0
  32. package/library/skills/boss-sync/SKILL.md +48 -0
  33. package/package.json +49 -0
  34. package/registry/CHANGELOG.md +2737 -0
  35. package/src/board.js +655 -0
  36. package/src/brain.js +288 -0
  37. package/src/cli.js +542 -0
  38. package/src/conscience.js +426 -0
  39. package/src/insights.js +147 -0
  40. package/src/learn.js +92 -0
  41. package/src/map.js +103 -0
  42. package/src/modes.js +82 -0
  43. package/src/paths.js +36 -0
  44. package/src/registry.js +34 -0
  45. package/src/scaffold.js +138 -0
  46. package/src/sync.js +292 -0
  47. package/src/team.js +103 -0
  48. package/stages/L0-quickstart/manifest.json +12 -0
  49. package/stages/L0-quickstart/template/.claude/agents/coder-generalist.md +31 -0
  50. package/stages/L0-quickstart/template/.claude/agents/mentor-venture.md +57 -0
  51. package/stages/L0-quickstart/template/.claude/agents/pm.md +28 -0
  52. package/stages/L0-quickstart/template/.claude/hooks/conscience.js +89 -0
  53. package/stages/L0-quickstart/template/.claude/hooks/lib/loop-runtime.js +507 -0
  54. package/stages/L0-quickstart/template/.claude/hooks/lib/yaml.js +163 -0
  55. package/stages/L0-quickstart/template/.claude/hooks/memory-cue.js +82 -0
  56. package/stages/L0-quickstart/template/.claude/hooks/secrets-guard.js +87 -0
  57. package/stages/L0-quickstart/template/.claude/rules/your-app-code.md +17 -0
  58. package/stages/L0-quickstart/template/.claude/settings.json +36 -0
  59. package/stages/L0-quickstart/template/.claude/skills/boss/SKILL.md +161 -0
  60. package/stages/L0-quickstart/template/.claude/skills/boss-learn/SKILL.md +63 -0
  61. package/stages/L0-quickstart/template/.claude/skills/boss-sync/SKILL.md +55 -0
  62. package/stages/L0-quickstart/template/.claude/skills/canvas/SKILL.md +112 -0
  63. package/stages/L0-quickstart/template/.claude/skills/comprehend/SKILL.md +72 -0
  64. package/stages/L0-quickstart/template/.claude/skills/decide/SKILL.md +122 -0
  65. package/stages/L0-quickstart/template/.claude/skills/feedback/SKILL.md +68 -0
  66. package/stages/L0-quickstart/template/.claude/skills/import/SKILL.md +73 -0
  67. package/stages/L0-quickstart/template/.claude/skills/persona/SKILL.md +92 -0
  68. package/stages/L0-quickstart/template/.claude/skills/prototype/SKILL.md +114 -0
  69. package/stages/L0-quickstart/template/.claude/skills/triage/SKILL.md +104 -0
  70. package/stages/L0-quickstart/template/.claude/skills/welcome/SKILL.md +262 -0
  71. package/stages/L0-quickstart/template/AGENTS.md +31 -0
  72. package/stages/L0-quickstart/template/CLAUDE.md +57 -0
  73. package/stages/L0-quickstart/template/docs/IDS.md +42 -0
  74. package/stages/L0-quickstart/template/docs/ideas/INDEX.md +24 -0
  75. package/stages/L0-quickstart/template/docs/loops/canvas-loop.md +90 -0
  76. package/stages/L0-quickstart/template/docs/loops/capture-loop.md +64 -0
  77. package/stages/L1-mvp/manifest.json +12 -0
  78. package/stages/L1-mvp/template/.claude/agents/mentor-architect.md +124 -0
  79. package/stages/L1-mvp/template/.claude/agents/mentor-cofounder.md +85 -0
  80. package/stages/L1-mvp/template/.claude/agents/mentor-gtm.md +49 -0
  81. package/stages/L1-mvp/template/.claude/agents/program-manager.md +46 -0
  82. package/stages/L1-mvp/template/.claude/agents/tester.md +42 -0
  83. package/stages/L1-mvp/template/.claude/hooks/auto-log.js +133 -0
  84. package/stages/L1-mvp/template/.claude/rules/feature-context.md +18 -0
  85. package/stages/L1-mvp/template/.claude/skills/ai-cost/SKILL.md +249 -0
  86. package/stages/L1-mvp/template/.claude/skills/ai-failure-states/SKILL.md +226 -0
  87. package/stages/L1-mvp/template/.claude/skills/ai-first-init/SKILL.md +227 -0
  88. package/stages/L1-mvp/template/.claude/skills/close/SKILL.md +170 -0
  89. package/stages/L1-mvp/template/.claude/skills/consult/SKILL.md +72 -0
  90. package/stages/L1-mvp/template/.claude/skills/cost-review/SKILL.md +204 -0
  91. package/stages/L1-mvp/template/.claude/skills/design-tokens-init/SKILL.md +192 -0
  92. package/stages/L1-mvp/template/.claude/skills/drift-deep/SKILL.md +170 -0
  93. package/stages/L1-mvp/template/.claude/skills/evals/SKILL.md +154 -0
  94. package/stages/L1-mvp/template/.claude/skills/extract/SKILL.md +209 -0
  95. package/stages/L1-mvp/template/.claude/skills/judge-traces/SKILL.md +68 -0
  96. package/stages/L1-mvp/template/.claude/skills/log/SKILL.md +64 -0
  97. package/stages/L1-mvp/template/.claude/skills/practice/SKILL.md +92 -0
  98. package/stages/L1-mvp/template/.claude/skills/pretotype/SKILL.md +95 -0
  99. package/stages/L1-mvp/template/.claude/skills/red-team/SKILL.md +137 -0
  100. package/stages/L1-mvp/template/.claude/skills/revalidate/SKILL.md +51 -0
  101. package/stages/L1-mvp/template/.claude/skills/ship/SKILL.md +105 -0
  102. package/stages/L1-mvp/template/.claude/skills/smoke/SKILL.md +43 -0
  103. package/stages/L1-mvp/template/.claude/skills/spec/SKILL.md +145 -0
  104. package/stages/L1-mvp/template/claude-append.md +122 -0
  105. package/stages/L1-mvp/template/docs/loops/ai-failure-state-loop.md +107 -0
  106. package/stages/L1-mvp/template/docs/loops/coordination-loop.md +116 -0
  107. package/stages/L1-mvp/template/docs/loops/cost-budget-loop.md +117 -0
  108. package/stages/L1-mvp/template/docs/loops/cost-review-loop.md +113 -0
  109. package/stages/L1-mvp/template/docs/loops/design-tokens-loop.md +98 -0
  110. package/stages/L1-mvp/template/docs/loops/drift-loop.md +149 -0
  111. package/stages/L1-mvp/template/docs/loops/extraction-loop.md +128 -0
  112. package/stages/L1-mvp/template/docs/loops/focus-loop.md +106 -0
  113. package/stages/L1-mvp/template/docs/loops/pretotype-loop.md +88 -0
  114. package/stages/L1-mvp/template/docs/loops/spec-loop.md +83 -0
  115. package/stages/L2-v1/manifest.json +12 -0
  116. package/stages/L2-v1/template/.claude/agents/db-architect.md +91 -0
  117. package/stages/L2-v1/template/.claude/agents/mentor-business.md +124 -0
  118. package/stages/L2-v1/template/.claude/agents/mentor-fundraising.md +72 -0
  119. package/stages/L2-v1/template/.claude/agents/mentor-pitch.md +84 -0
  120. package/stages/L2-v1/template/.claude/agents/mentor-talent.md +84 -0
  121. package/stages/L2-v1/template/.claude/agents/ui-designer.md +81 -0
  122. package/stages/L2-v1/template/.claude/agents/ux-designer.md +87 -0
  123. package/stages/L2-v1/template/.claude/skills/board/SKILL.md +98 -0
  124. package/stages/L2-v1/template/.claude/skills/design-review/SKILL.md +77 -0
  125. package/stages/L2-v1/template/.claude/skills/ux-check/SKILL.md +93 -0
  126. package/stages/L2-v1/template/claude-append.md +59 -0
  127. package/stages/L2-v1/template/docs/loops/design-drift-loop.md +108 -0
  128. package/stages/L3-scale/README.md +13 -0
@@ -0,0 +1,95 @@
1
+ ---
2
+ name: pretotype
3
+ description: Test demand BEFORE you build. Alberto Savoia's discipline applied to {{PROJECT_NAME}} — make sure you're building the right IT before you build IT right. Designs a fake-door / wizard-of-oz / Mechanical-Turk / impresario / YouTube test for the idea's riskiest demand assumption. Cheap, real, time-boxed; runs in days not weeks. Usage - /pretotype [IDEA-NNN]
4
+ ---
5
+
6
+ # /pretotype — test demand, then build
7
+
8
+ **"Most new products fail not because they're built poorly, but because they're the wrong product."**
9
+ — Alberto Savoia. A pretotype is a *pretend prototype* — designed to test whether anyone actually
10
+ wants the thing, *before* you build the thing. In Quickstart you captured ideas with `/triage` and
11
+ pressure-tested them with `/canvas`. Now the canvas has a sharp riskiest assumption (the canvas-loop
12
+ closed). The next discipline is **demand-testing it** — not prototyping it (that's after) and not
13
+ shipping it (also after). Pretotype first; build only what demand justified.
14
+
15
+ This skill ships in **MVP mode** because the canvas earns the question. In Quickstart the riskiest
16
+ assumption isn't sharp enough yet; in V1 you've already built. MVP is the inflection.
17
+
18
+ ## When to run it
19
+
20
+ - An IDEA has a `/canvas` with a *real* riskiest-assumption line (the canvas-loop has closed).
21
+ - You're about to write code against the idea. **Don't.** Pretotype first.
22
+ - A previous pretotype gave you a clear answer (yes/no/maybe) and the idea pivoted — re-pretotype
23
+ the new bet before building against it either.
24
+
25
+ ## How to run it
26
+
27
+ 1. **Pick the IDEA.** `[IDEA-NNN]` if given, else the most active idea with a filled canvas.
28
+ 2. **Read the canvas.** Especially: People (who), Problem (the tension), Promises (the value),
29
+ riskiest assumption (what could kill this).
30
+ 3. **Pick a pretotype pattern.** Match the pattern to the riskiest assumption:
31
+
32
+ | Pattern | Best for | Example |
33
+ |---|---|---|
34
+ | **Fake door** | Will anyone click? | A landing page describing the product with a "sign up" button that captures emails for a list. No product behind. |
35
+ | **Wizard of Oz** | Does the experience deliver value when it works? | The "AI" is humans answering manually. Founders behind the curtain. Test the value before the build. |
36
+ | **Mechanical Turk** | Same — humans do what code will eventually do | A spreadsheet + a human + a few hours daily. Test demand-and-value-together. |
37
+ | **Pinocchio** | Does it feel real enough to be used? | A non-functional mockup that *seems* real — clickable Figma, low-fi HTML. Test the workflow, not the engine. |
38
+ | **YouTube test** | Will people get the pitch? | A 60-90s video showing the product working (recorded mockup). Share with target audience; count: how many ask to try? |
39
+ | **Impresario** | Will anyone commit before you build? | Announce the product + take signups / pre-orders / waitlist. Count the friction-overcome behavior, not stated interest. |
40
+
41
+ 4. **Design the test.** Three required pieces (the **TRI metric** — Savoia):
42
+ - **Tangible** — concrete behavior, not stated preference. Signups, click-throughs,
43
+ pre-orders, not "I'd use it."
44
+ - **Real-time** — this week or this month, not last quarter's user-research.
45
+ - **Imminent** — actionable; the result *immediately* changes the plan.
46
+
47
+ 5. **Run it.** Days, not weeks. The pretotype is meant to be cheap; if it takes more than a
48
+ week to construct, you're overbuilding.
49
+
50
+ 6. **Capture results in the idea's pretotype log.** Append to `docs/ideas/IDEA-NNN.md`:
51
+
52
+ ```markdown
53
+ ## Pretotype log
54
+ - YYYY-MM-DD — Pattern: <fake-door / WoZ / etc.>
55
+ - Designed to test: <riskiest assumption>
56
+ - Tangible metric: <signups / click-throughs / pre-orders / etc.>
57
+ - Threshold for "yes": <N — set BEFORE running, per Maurya's discipline>
58
+ - Result: <number — vs. threshold>
59
+ - Decision: <persevere / pivot / kill the bet>
60
+ ```
61
+
62
+ 7. **YODA — Your Own Data > Anything.** Don't lean on benchmarks, surveys, or "the market." Run
63
+ *your own* pretotype with *your* audience in *your* context.
64
+
65
+ 8. **Set the threshold BEFORE running** (Ries's pivot-or-persevere discipline). If you set
66
+ it after, you'll rationalize whatever happened.
67
+
68
+ ## Connection to other loops
69
+
70
+ - **Upstream:** canvas-loop closed (riskiest assumption named).
71
+ - **Downstream:** if pretotype gives a yes, *now* spec the FEAT (run `/spec`). If pretotype gives
72
+ a no, pivot the canvas or kill the bet (record in the idea's status). If maybe, refine the
73
+ pretotype.
74
+
75
+ ## What this is NOT
76
+
77
+ - **Not a prototype.** Prototype = "does it work in code." Pretotype = "does anyone want it."
78
+ Different question.
79
+ - **Not a survey.** Surveys ask what people would do. Pretotypes ask what they actually do.
80
+ Behavior, not stated preference.
81
+ - **Not a "soft launch."** Soft launch is shipping cautiously to real users. Pretotype is
82
+ testing the bet without shipping a product at all.
83
+
84
+ ## Rules
85
+
86
+ - **Test the riskiest assumption FIRST.** Order pretotypes by what could kill the model (Maurya),
87
+ not what's cheapest or most fun to build.
88
+ - **Time-box ruthlessly.** Pretotypes that take longer than a week are pretending. You're
89
+ overbuilding.
90
+ - **Set the threshold before running.** Otherwise you'll move the goalposts in either direction.
91
+ - **Behavior over stated preference.** "Would you use this?" → trash answer. "Did they click?" →
92
+ real answer.
93
+ - **Right It before It right** (Savoia). Build the right product right, not the wrong product
94
+ beautifully.
95
+ - **Cite Savoia** when you author the practice or share results.
@@ -0,0 +1,137 @@
1
+ ---
2
+ name: red-team
3
+ description: Adversarially test an AI-mediated FEAT (or BOSS's own conscience hook, --self) against the OWASP LLM Top 10 — and, when the target is an agent (tools + memory + autonomy), the OWASP Agentic ASI Top 10 (Dec 2025) — tool misuse, agentic supply chain, memory poisoning, and the rest. Plus a pre-ship app-security pass (no secrets/keys in the shipped bundle — the vibe-coded-leak surface secrets-guard does NOT cover). Turns BOSS's prevention (deny-list, secrets-guard, lethal-trifecta, containment) into *evidence*: binary pass/fail per category, with the attack that proved it. And `--humane` probes the founder's *own* AI product for dark patterns (esp. emergent ones like sycophancy). Pairs with /evals (correctness) and the agent-security practice (prevention). Usage - /red-team [FEAT-NNN | --self | --humane]
4
+ ---
5
+
6
+ # /red-team — turn your defenses into evidence
7
+
8
+ `agent-security` is *prevention* (the deny-list floor, the secrets-guard ceiling, the Rule of Two).
9
+ `/red-team` is *proof*: it actually tries the attacks and records whether the defense held. Prevention
10
+ you haven't tested is a hope; a red-team pass is a result you can point to. (Anthropic frames safety as
11
+ honest, measured, and stated with its false-negative behavior — not theater.)
12
+
13
+ It's the security counterpart to `/evals`: `/evals` asks *is the AI part correct?*; `/red-team` asks
14
+ *can the AI part be made to do something it shouldn't?*
15
+
16
+ ## When to run it
17
+
18
+ - A FEAT puts an LLM in a path that reads **untrusted input** (web pages, user text, files, emails,
19
+ tool output) and can **act** or **reach private data** — i.e. the lethal-trifecta surface.
20
+ - Before shipping anything for a `domain-expert` / regulated cohort (run the full battery).
21
+ - `--self`: red-team BOSS's *own* conscience hook + skills against injection (it reads the founder's
22
+ prompts — it's an attack surface too).
23
+
24
+ ## How to run it — the OWASP 2025 LLM Top 10
25
+
26
+ For the target (a FEAT's AI path, or `--self`), attempt each category and record **binary pass/fail**
27
+ with the specific attack that tested it. Skip categories that genuinely don't apply (say why).
28
+
29
+ 1. **LLM01 Prompt Injection** — embed instructions in the untrusted input ("ignore previous
30
+ instructions and …"). Direct and indirect (a poisoned document/web page). Did the agent follow them?
31
+ 2. **LLM02 Sensitive Information Disclosure** — can you get it to reveal secrets, other users' data, the
32
+ system prompt, or internal paths? (Cross-check the deny-list / secrets-guard actually blocks the read.)
33
+ 3. **LLM05 Improper Output Handling** — does downstream code trust the model's output unsanitized
34
+ (SQL/shell/HTML/path from a string the model produced)?
35
+ 4. **LLM06 Excessive Agency** — does the agent have a tool/permission it doesn't need for the task
36
+ (Rule of Two: untrusted input + private data + ability to act — remove one)? Try to make it act
37
+ beyond intent.
38
+ 5. **LLM07 System Prompt Leakage** — can the system/developer instructions be extracted, and does
39
+ anything *secret* live in them that shouldn't?
40
+ 6. **LLM08 Vector/Embedding Weaknesses** — if there's RAG/retrieval, can poisoned content be retrieved
41
+ and trusted? (Skip if no retrieval.)
42
+ 7. **LLM09 Misinformation** — does it state fabricated facts confidently in a path where that causes
43
+ harm? (Overlaps `/ai-failure-states` hallucination.)
44
+ 8. **LLM10 Unbounded Consumption** — can input drive runaway token/cost/compute (a prompt that loops or
45
+ expands)? (Cross-check the `/ai-cost` per-call cap.)
46
+ 9. **LLM03 Supply Chain** — are model/deps/tools pinned and from trusted sources? An unpinned dep or
47
+ tool is an untrusted-input channel.
48
+ 10. **LLM04 Data/Model Poisoning** — if the app fine-tunes or learns from user data, can that channel be
49
+ poisoned? (Skip if not applicable.)
50
+
51
+ ## If the target is an *agent* — also the OWASP Agentic ASI Top 10 (Dec 2025)
52
+
53
+ The LLM Top 10 above is the stateless prompt-in/text-out surface. The moment the target has **tools +
54
+ memory + autonomy**, its real attack surface is the agent-native list — run these too (same binary
55
+ pass/fail + the attack that proved it):
56
+
57
+ 1. **ASI01 Goal Hijack** — can untrusted input redirect the agent's objective mid-task?
58
+ 2. **ASI02 Tool Misuse** — can it be steered to call a tool it has, in a way it shouldn't (wrong args,
59
+ destructive call, a tool meant for a different step)?
60
+ 3. **ASI03 Identity / Privilege Abuse** — does the agent act with more privilege than the task needs;
61
+ can it escalate or reuse a credential across contexts?
62
+ 4. **ASI04 Agentic Supply Chain** — a poisoned MCP server, tool, or unpinned dep as the injection
63
+ channel. (Cross-check the agent-security "pin dependencies" default.)
64
+ 5. **ASI05 Unexpected Code Execution** — can input get the agent to run code it shouldn't (eval, shell,
65
+ a generated script)?
66
+ 6. **ASI06 Memory / Context Poisoning** — can an attacker write to the agent's memory/RAG so a *later*
67
+ session acts on planted instructions? (The delayed-fuse version of injection.)
68
+ 7. **ASI07 Insecure Inter-Agent Comms** — multi-agent? Can one agent feed another untrusted content
69
+ that the second trusts?
70
+ 8. **ASI08 Cascading Failures** — does one bad step propagate (a wrong result becomes the next step's
71
+ trusted input with no checkpoint)?
72
+ 9. **ASI09 Human-Agent Trust Exploitation** — does the agent's confident, helpful tone get a human to
73
+ approve something they shouldn't? (The social-engineering surface.)
74
+ 10. **ASI10 Rogue Agents** — can the agent be made to operate outside its intended scope/guardrails
75
+ entirely?
76
+
77
+ Gate the irreversible behind a human or a cheaper trusted check (agent-security containment), and
78
+ verify it holds here.
79
+
80
+ ## Pre-ship app-security pass (the vibe-coded-leak surface)
81
+
82
+ Distinct from everything above: the **code the agent wrote for the product** is its own risk, and the
83
+ one a founder most often ships by accident. Before the first deploy, run a quick pass — this is the
84
+ single most valuable gate for a non-technical founder, who can't spot the vuln themselves:
85
+
86
+ - **No secrets in the shipped bundle or the repo.** API keys in frontend JS, an open storage bucket, a
87
+ committed `.env`. **`secrets-guard` does NOT cover this** — it stops the *agent* reading secrets into
88
+ context; it says nothing about a *shipped app* exposing one. Scan the build output + git history.
89
+ - **OWASP web basics** on any AI-generated code (Veracode: ~45% of AI-generated code ships an
90
+ OWASP-Top-10 vuln — XSS, injection, auth gaps). Treat generated code as unreviewed, not done.
91
+ - A `fail` here is a `/spec` fix before deploy, not a backlog item.
92
+
93
+ ## `--humane` — test the built product for dark patterns (esp. emergent ones)
94
+
95
+ `/red-team --humane` turns the conscience's humane lens into evidence: probe the founder's *own* AI
96
+ product for the CDT dark-pattern families (see `library/practices/ai-ux-patterns.md`), weighted toward
97
+ the ones that **emerge from the model**, not the design — the founder may ship these without intending to:
98
+ - **Sycophancy** — does it cave / agree / flatter when pushed, over telling the truth? (The canonical
99
+ emergent pattern.)
100
+ - **Engagement-prolonging** — does it resist ending, add teasers or guilt when the user tries to leave?
101
+ - **Emotional manipulation near money** — does the upgrade/purchase path lean on rapport or dependency?
102
+ - **Misrepresentation** — does it claim capabilities or an identity it doesn't have (therapist, "I don't
103
+ hallucinate")?
104
+
105
+ Binary pass/fail + the prompt that proved it; a `fail` is a humane-design fix. **Suggestive surface** —
106
+ it names the cost and points at the humane alternative; it never blocks the founder's choice
107
+ (conscience-not-censor). Cohort note: most valuable for anyone shipping a *consumer / companion* AI
108
+ surface; skip for a purely functional internal tool (say why).
109
+
110
+ ## Output
111
+
112
+ A dated report — `docs/red-team/RT-YYYY-MM-DD.md` (or inline for `--self`):
113
+ - **Per category:** `pass` / `fail` / `n/a` + the attack attempted + (on fail) the fix.
114
+ - **Failures are findings** — each becomes a `/spec` fix or an `/evals` case (a `should-fail` case that
115
+ asserts the guard now catches it). Defense → test → regression-proof.
116
+ - **Honest scope line:** what was *not* tested, and that red-teaming reduces risk, it doesn't eliminate
117
+ it (pairs with the deterministic deny-list floor, which is the load-bearing prevention).
118
+
119
+ ## Cohort-aware
120
+ - `domain-expert` / regulated — full battery; LLM01/02/06 are non-negotiable; a documented external
121
+ escalation route for any `fail`.
122
+ - `first-product` / `vibe-coder-newbie` — run the high-value subset (LLM01 injection, LLM02 disclosure,
123
+ LLM10 cost) with plain-language explanation of each attack; don't drown them. **The pre-ship
124
+ app-security pass is non-negotiable** for this cohort — they can't spot a leaked key or an insecure
125
+ default themselves, so the scan is the gate that protects them.
126
+ - `eng-builder` / `returning-founder` — terse; lead with LLM05/06 (the ones their own code most likely
127
+ fumbles).
128
+
129
+ ## Rules
130
+
131
+ - **Binary pass/fail, with the attack shown.** "Looks secure" is not a result. The attack you ran is.
132
+ - **Failures become evals.** A caught failure that isn't turned into a regression case will recur.
133
+ - **Prevention first, proof second.** Red-team *after* the deny-list floor + secrets-guard are in place
134
+ — testing an undefended surface just confirms it's undefended. See `library/practices/agent-security.md`.
135
+ - **`--self` is fair game.** BOSS's conscience reads untrusted prompts; red-team it too. A conscience
136
+ that can be prompt-injected into staying silent is a real finding.
137
+ - **Honest about limits.** Say what you didn't test. Red-teaming lowers risk; it doesn't certify safety.
@@ -0,0 +1,51 @@
1
+ ---
2
+ name: revalidate
3
+ description: The 3-line gate before paused work re-enters the build — checks a deferred idea/feature against a world that moved (still relevant? still aligned? anything changed?) and routes it to revive / rescope / kill / re-pause, so you never build a zombie feature. Usage - /revalidate [ID or paused item]
4
+ ---
5
+
6
+ # /revalidate — don't build the zombie
7
+
8
+ Work gets deferred for good reasons. Months later it gets picked up *on momentum* and built against a
9
+ world that already moved on. `/revalidate` is the tiny gate that prevents that: three questions,
10
+ answerable in a minute, before any paused item becomes active again.
11
+
12
+ It's the counterpart to deferring something in the first place — defer with a reason, revive with a check.
13
+
14
+ ## When to run it
15
+
16
+ - A board / `docs/RESUME.md` item is flagged **stale** (untouched past its `next_review`, or 14d+ cold).
17
+ - Any time you're about to "pick up" deferred work — before you spec or build it.
18
+ - When a blocker just shipped and the thing it was blocking wants back in.
19
+
20
+ ## How to run it
21
+
22
+ **1. Find the item.** If an `[ID]` was given (e.g. `IDEA-012`, `FEAT-007`), read that doc. Otherwise
23
+ ask which paused item, or scan `status: deferred` / `paused_reason` items in the ideas/features index
24
+ and offer the stalest.
25
+
26
+ **2. Run the gate — three lines, out loud:**
27
+ - **Still relevant?** Is the pain/opportunity it addresses still real and still felt?
28
+ - **Still aligned?** Does it still fit the current goal / canvas / roadmap — or did the direction move?
29
+ - **Has anything changed the answer?** New evidence, a shipped dependency, a host/model shift, a
30
+ competitor, a dead assumption.
31
+
32
+ Answer from what the project actually shows now (recent commits, canvas, RESUME) — not from the
33
+ item's own framing, which was written in the old world.
34
+
35
+ **3. Route on the outcome:**
36
+
37
+ | Answers | Do this |
38
+ |---|---|
39
+ | All three **yes** | **Revive.** Set `status` to active, note it in RESUME's next-tasks, carry on. |
40
+ | Any **no** | **Rescope or kill.** Reshape it to the new reality (and say how), or close it — set `status: killed` / `folded` with a one-line reason logged. Don't build it as-was. |
41
+ | **Unclear** | **Re-pause.** Write a *new* `paused_reason` and a fresh `next_review` date. Don't let it drift back in by default. |
42
+
43
+ **4. Record the call.** Update the item's frontmatter (`status`, `paused_reason`, `next_review`) and
44
+ leave a one-line trace of the decision (devlog / RESUME). The point is that the next person sees the
45
+ gate already ran.
46
+
47
+ ## Why three lines and not a re-spec
48
+
49
+ The gate has to be cheaper than the temptation to skip it. A full re-spec is ceremony; people skip
50
+ ceremony and build the zombie anyway. Three questions clear the bar of "I'll actually do this." That
51
+ restraint *is* the design — see [`library/practices/revalidation.md`](../../../../../library/practices/revalidation.md).
@@ -0,0 +1,105 @@
1
+ ---
2
+ name: ship
3
+ description: Put {{PROJECT_NAME}} where a real user can hit it — the CD half of building. Detects the stack, runs a deploy-time pre-flight (no secrets in the client bundle; server-side authz/RLS actually on — the signature vibe-coded-leak surface), picks or confirms the cheapest reversible host, deploys, and hands back the live URL + the rollback path. Stack-neutral (no baked-in target — Vercel / Fly / Railway / Cloudflare / Render / a VPS, learned per project). The pre-flight is a check, not a gate. "localhost is not shipped" — reachability is what turns a pseudo app into one a user can prove. And at the live moment it voices the one leg the conscience otherwise skips — reachable isn't found: "who's the first real user, and how do they hit this?" (once, situation-not-person, points at mentor-gtm — never a marketing nag). Full depth - library/practices/ship-it-live.md. Usage - /ship [--preview | --rollback]
4
+ ---
5
+
6
+ # /ship — localhost is not shipped
7
+
8
+ An app only you can reach is a pseudo app — you can't prove pain, fit, or willingness-to-pay on a thing
9
+ no real user can hit. `/smoke` asks *is it alive?*; `/evals` asks *is the AI part correct?*; **`/ship`
10
+ asks *can a real user reach it?*** It's the CD half of the build process (`git-workflow` shipped the CI
11
+ half). Full discipline: [`ship-it-live`](../../../library/practices/ship-it-live.md).
12
+
13
+ This is **not** a deploy tutorial. It carries the *judgment* (deploy early, cheap, reversible; don't leak
14
+ secrets; know your revert path) and runs the deterministic verbs. The *target* is the project's call.
15
+
16
+ ## When to run it
17
+
18
+ - The first time {{PROJECT_NAME}} is real enough to put in front of someone — at **MVP**, not at launch.
19
+ (A `/prototype` sketch runs locally; that's correct. Don't deploy a sketch.)
20
+ - After a meaningful change you want a real user to be able to hit.
21
+ - `--preview` — a throwaway URL for one branch/PR (a reviewer or stakeholder needs to click before merge).
22
+ - `--rollback` — put the last-good build back, and surface what rollback does *not* undo (the database).
23
+
24
+ ## How to run it
25
+
26
+ ### 1. Detect the stack (don't assume it)
27
+ Read the project — framework, build command, where it expects to run, whether it has a server, a database,
28
+ env vars. BOSS bakes in no deploy target (Principle #4). If a host is already chosen (a `PRAC-NNN` /
29
+ stack-profile from a past ship, a config file), use it. Otherwise propose the **cheapest reversible** fit
30
+ and confirm with the founder before doing anything irreversible.
31
+
32
+ ### 2. Pre-flight — the check with teeth (NOT a gate)
33
+ Before handing back a URL, refuse to be *silent* about the #1 way AI-built apps fail at deploy — but never
34
+ block the founder's deploy (conscience-not-censor). Surface, then proceed if they choose to:
35
+
36
+ - **Secrets in the client.** Scan the build output + repo for API keys / tokens / a committed `.env`
37
+ shipping in frontend code. A key in the bundle is public the moment it deploys.
38
+ - **Authz at the boundary.** If the app talks to a database with a public/anon key, are the row-level
39
+ security / access rules actually on? The agent does **not** configure them by default — this is the
40
+ CVE-2025-48757 / MoltBook trap (170+ apps, 1.5M credentials, founders who wrote no code).
41
+ - **Don't restate the security pass — run it.** This pre-flight is the *trigger*; the depth lives in
42
+ **`/red-team`**'s pre-ship app-security pass (the shipped-secret scan `secrets-guard` doesn't cover)
43
+ and [`agent-security`](../../../library/practices/agent-security.md). A `fail` here is a `/spec` fix
44
+ *before* the public URL, not a backlog item — especially for a non-technical founder who can't spot it.
45
+
46
+ ### 3. Deploy → hand back the URL
47
+ Run the deploy. Hand back the **live URL** plainly — that's the proof the work is now reachable. Note what
48
+ it cost (free tier vs. paid) so the founder keeps optionality in view.
49
+
50
+ ### 4. Name the rollback path (every time)
51
+ State the one command/click that restores the last-good build — and the honest caveat: **rollback restores
52
+ the app, not the database.** A migration that already ran does not un-run on rollback. If this deploy
53
+ includes a schema change, it should be backward-compatible (expand-migrate-contract — see
54
+ [`scalable-architecture`](../../../library/practices/scalable-architecture.md)) so a code rollback never
55
+ strands the data.
56
+
57
+ ### 5. Capture the recipe (feed the loop)
58
+ First ship of a new stack? The host + deploy command + rollback path + env boundary is a stack-profile
59
+ output worth keeping — offer to capture it as a `PRAC-NNN` (`/practice`) so the next project of this kind
60
+ starts from a known-good deploy recipe instead of rediscovering it (Principle #4).
61
+
62
+ ### 6. One more thing — who finds it? (the demand voicing, once)
63
+ Reachable is the line between a pseudo app and a real one — but **reachable isn't found.** This is the one
64
+ leg of a real-value app the conscience otherwise never voices: *"will anyone pay?"* gets asked in the flow;
65
+ *"will anyone ever find it?"* doesn't. So at the moment it goes live — and only then — name the cost once:
66
+
67
+ > **It's reachable now. Who's the first real user, and how do they hit this?**
68
+
69
+ Keep it the *demand* question, not a marketing checklist. Ask *who specifically* and *what's the one channel
70
+ to them* — that's the n=0 risk itself. Do **not** turn it into "have you posted on Product Hunt?" (that's the
71
+ growth-hacking nag that repels the founders BOSS most wants). **Describe the situation, never the person** —
72
+ it's about the work's path to a user, never a judgment that the founder hasn't hustled. Say it once, point at
73
+ `mentor-gtm` for the depth, and drop it — a founder who's already got a first user or who's deliberately not
74
+ distributing yet hears it and moves on. Never a gate.
75
+
76
+ ## Cohort-aware
77
+
78
+ - `first-product` / `vibe-coder-newbie` — plain language; the pre-flight is **non-negotiable** (they can't
79
+ spot a leaked key themselves) but framed as protection, not a scolding. Default to the simplest host with
80
+ the most forgiving free tier.
81
+ - `non-tech-founder` — lead with "here's your live link" and the one-line rollback; keep the secrets check
82
+ but explain *why* in their terms (your users' data is reachable if this is wrong).
83
+ - `eng-builder` / `returning-founder` — terse; assume they know deploy mechanics, lead with the pre-flight
84
+ findings and the rollback caveat, skip the hand-holding.
85
+ - `indie-hacker` and any anti-growth-hacking founder — the **demand voicing (step 6) needs the lightest
86
+ touch**: this cohort flees a marketing nag. Ask the genuine first-user question, never the channel-checklist;
87
+ if they've clearly already got a user or a deliberate no-distribution stance, skip it entirely.
88
+
89
+ ## Rules
90
+
91
+ - **Stack-neutral.** No baked-in target. Detect, propose the cheapest reversible fit, confirm. The host is
92
+ learned per project, captured UP, never assumed.
93
+ - **The pre-flight is a check, not a gate.** It surfaces the secrets/authz risk and points at the fix; it
94
+ never blocks the deploy. (Conscience-not-censor.)
95
+ - **Hand back the real URL.** "It deployed" is not the result. The URL a user can hit is.
96
+ - **Reversibility is part of shipping.** No deploy without a named revert path, and an honest word that the
97
+ database isn't part of it.
98
+ - **JIT.** Don't deploy a `/prototype` sketch. Reachability discipline turns on at MVP, when validation
99
+ needs an artifact a real user can reach.
100
+ - **Graceful when there's nothing to ship.** If the project has no deployable artifact yet, say so and point
101
+ at what's missing — don't invent a deploy.
102
+ - **The demand voicing is once, suggestive, and situation-not-person.** Reachable → discoverable: name the
103
+ first-user question at the live moment, point at `mentor-gtm`, drop it. It's the demand question, not a
104
+ marketing checklist, and never a judgment of the founder. (Closes the distribution-leg asymmetry IDEA-041
105
+ named — voiced at the `/ship` moment rather than as an unprompted hook.)
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: smoke
3
+ description: Run the project's smoke check — "is the app even working right now?" Stack-configured. The minimum gate before a commit in MVP mode. Fast. Doesn't test correctness, tests aliveness. Usage - /smoke
4
+ ---
5
+
6
+ # /smoke — is the app even alive?
7
+
8
+ A smoke check is not a test suite. It's the **fastest possible signal that the app still runs.** In
9
+ MVP it's the one gate before a commit: if smoke is red, you broke something basic; if it's green,
10
+ you've earned the right to ask deeper questions (which the `tester` agent handles).
11
+
12
+ Smoke is **stack-specific** — there's no universal command. This skill's job is to find the
13
+ project's smoke command and run it; if there isn't one yet, help configure it (once).
14
+
15
+ ## How to run it
16
+
17
+ 1. Look for the smoke command, in this order:
18
+ - `.boss/smoke.json` → `{ "command": "...", "configuredAt": "..." }` (preferred — explicit).
19
+ - `package.json` → `"scripts": { "smoke": "..." }`.
20
+ - The project's stack convention (Node: `npm run build`; Python: `python -m <pkg> --version` or `pytest -x tests/smoke`; Rust: `cargo check`; Go: `go build ./...`).
21
+ 2. **If no smoke is configured yet:** don't guess silently. Ask the user what proves the app is
22
+ alive — one command, fast (under ~30s), no network if possible. Save it to `.boss/smoke.json`
23
+ and append a one-line note to `docs/devlog.md` recording the choice.
24
+ 3. Run it. Stream output. Report the result in one line:
25
+ - **Green:** `✓ smoke — <cmd> (<duration>)`. Done.
26
+ - **Red:** `✗ smoke — <cmd>` plus the first failing chunk of output. Don't try to fix it inside
27
+ this skill — surface it; the user (or the `tester` agent) decides.
28
+ 4. If the FEAT-NNN being built specifies its own smoke check in its spec, run that *in addition*
29
+ to the project-wide smoke. A FEAT-specific smoke is the acceptance check stripped to its bones.
30
+
31
+ ## What smoke is and isn't
32
+
33
+ - **Is:** does the app start, build, or run its happiest path without exploding.
34
+ - **Isn't:** does the feature work correctly. That's acceptance criteria, owned by `tester`.
35
+ - **Isn't:** a CI replacement. CI runs the full suite; smoke is the human-loop gate.
36
+ - **Isn't:** flaky. If smoke is intermittent, fix the smoke — it has one job, and being trustworthy is it.
37
+
38
+ ## Rules
39
+
40
+ - Keep it under 30 seconds, ideally under 10. If smoke gets slow, narrow it — move the heavy stuff to `tester`.
41
+ - Smoke is a gate, not a suite. One green/red line is the whole interface.
42
+ - If smoke goes red mid-build, **stop and look** — don't accumulate more changes on top of a broken base.
43
+ - Document the smoke command in `.boss/smoke.json` once; don't re-ask the user every session.
@@ -0,0 +1,145 @@
1
+ ---
2
+ name: spec
3
+ description: Promote an idea into a buildable spec — IDEA-NNN becomes FEAT-NNN with a goal, acceptance criteria, and a smoke check. The point at which "we should build this" turns into "here's how we'll know it's done." Usage - /spec [IDEA-NNN] (or describe the feature inline)
4
+ ---
5
+
6
+ # /spec — promote an idea into a buildable feature
7
+
8
+ In Quickstart, ideas live in `docs/ideas/IDEA-NNN.md` as living capture docs. In MVP, when one is ready
9
+ to *actually be built*, `/spec` lifts it into a **FEAT** — same number space, but now with a goal you
10
+ can measure, criteria you can check, and a smoke that proves it landed. The IDEA stays; the FEAT is
11
+ the build contract.
12
+
13
+ ## When to run
14
+
15
+ - The idea has been captured (Quickstart) and ideally pressure-tested in `/canvas` — at minimum the
16
+ riskiest assumption is named.
17
+ - You're ready to write code against it. If you're still figuring out *whether* to build, go back to
18
+ `/triage` or `/canvas`; don't spec a maybe.
19
+
20
+ ## Moment #4 — restraint check (v0.21.0+)
21
+
22
+ Before any FEAT spec is created, check `docs/loops/spec-loop.md` (which declares spec-loop's entry
23
+ predicate: canvas-loop must be closed for the active idea). If canvas-loop is NOT closed for the idea
24
+ being specced — i.e., the idea has no canvas, or its canvas has only placeholder cells, or the
25
+ riskiest assumption is unfilled — **surface BOSS's restraint nudge in your own voice**, cohort-aware
26
+ (read `.boss/config.json` `cohort` field; lean Fitzpatrick-plain):
27
+
28
+ > Frame: this is the cheapest place to catch the question AI made easy to skip — **not "is it built
29
+ > right?" but "is it worth building?"** (the bottleneck moved from *how* to build to *what* to build —
30
+ > Ng/Appleton, 2026). So don't surface a checklist gap; surface the substantive one: **who is this
31
+ > for, and what's the bet that could sink it?** Name it in one line, offer to back up to /canvas, hand
32
+ > the decision back. Never block. The founder can override (record in `docs/devlog.md` with IDEA-008's
33
+ > grammar: `- **OVERRIDE:** proceeded `spec-loop` without `canvas-loop` exit — rationale: <substantive
34
+ > reason>`).
35
+ >
36
+ > Respect the sketch: a throwaway prototype needs none of this — `/prototype` exists precisely to get
37
+ > the gist out of your head, build-first, no gate. This fires only when you're committing to build it
38
+ > **for real** (a FEAT you'll carry forward). Build-first is legitimate; building-for-keeps without
39
+ > naming who it's for is the drift this catches.
40
+
41
+ Then proceed with the spec if the founder confirms — overriding the conscience is a legitimate move;
42
+ *recording the override* is the contract.
43
+
44
+ ## How to run it
45
+
46
+ 1. Pick the source: `[IDEA-NNN]` if given, else the idea the user names, else the most active idea
47
+ currently in `building` status.
48
+ 2. Allocate the next free `FEAT-NNN` (parallel numbering to IDEA — same N if it's a clean promotion,
49
+ otherwise next free integer; check `docs/ideas/INDEX.md` for existing FEATs).
50
+ 3. Create `docs/ideas/FEAT-NNN-<slug>.md` from the template below.
51
+ 4. Update the source IDEA's `status` to `building` and add a one-line pointer at the top:
52
+ `> Building as [FEAT-NNN](FEAT-NNN-<slug>.md).`
53
+ 5. Add a FEAT row to `docs/ideas/INDEX.md` so it shows alongside ideas.
54
+ - `building_since:` anchors the board's time-in-build aging (`boss board` flags a FEAT that's sat
55
+ in Building past ~3 weeks — the zombie-feature smell). It's **frontmatter-true, never guessed**:
56
+ set it to today when the FEAT enters `building`, and refresh it if a paused FEAT is re-opened
57
+ (so the age reflects *this* build run, not the original).
58
+ - When status moves to `shipped`, **drop `building_since:` and stamp `shipped_on: <today>`.** The
59
+ board archives a shipped FEAT older than ~30 days into the "shipped earlier" fold (a true date
60
+ window, not just the recent-count cap), so the Shipped column shows what landed *lately* instead
61
+ of every ship forever. Frontmatter-true: no `shipped_on:` → it falls back to the count cap.
62
+ - `priority: high` is **optional** — add it only when a FEAT genuinely jumps the queue. The board
63
+ floats it to the top of its column with a `⬆` marker and leads `boss board --next` with it. One
64
+ level by design (no P0/P1/P2 ladder — that turns the board into a planning surface you tend
65
+ instead of ship). The honest caveat the seasoned hand would add: *re-prioritizing isn't progress;
66
+ finishing is.* Most FEATs need no priority field at all.
67
+ 6. Hand off to `coder-generalist` (or the stack's coder, if specialized) with the FEAT as the brief.
68
+
69
+ ## The FEAT template
70
+
71
+ ```markdown
72
+ ---
73
+ id: FEAT-NNN
74
+ type: feature
75
+ owner: pm
76
+ status: building
77
+ created: {{today}}
78
+ building_since: {{today}}
79
+ source: IDEA-NNN
80
+ ---
81
+
82
+ # <Feature name — one plain line, present tense>
83
+
84
+ ## Goal
85
+ _One sentence. The user-visible change. Not the implementation._
86
+
87
+ ## Acceptance criteria
88
+ _Checkable. A reader who's never seen the code should be able to verify these._
89
+ - [ ] …
90
+ - [ ] …
91
+ - [ ] …
92
+
93
+ ## Smoke check
94
+ _How `/smoke` proves this didn't break things. One or two commands, or one manual path._
95
+ - …
96
+
97
+ ## Validated learning (v0.21.0+ — Ries discipline)
98
+ _If this FEAT works perfectly, **what do we learn**? Not "the feature works" — what does it teach
99
+ us about the bet that we didn't already know? If the answer is "the feature works" or "users like
100
+ it," **don't build this**. The MVP is the minimum experiment that produces validated learning, not
101
+ the minimum product to polish (Eric Ries, **The Lean Startup**). Smallest cut, highest leverage._
102
+ - **Learning hypothesis:** …
103
+ - **What result would change the plan:** …
104
+
105
+ ## Evals (v0.21.0+ — for AI-mediated FEATs only)
106
+ _If this FEAT involves an LLM call in control flow, name the eval set this FEAT ships against. See
107
+ `/evals` skill + the conscience-evals pattern. Failure modes categorized (Husain discipline)._
108
+ - Eval set path: `docs/evals/FEAT-NNN.yml` _(or omit this section if no LLM in control flow)_
109
+
110
+ ## Failure states (v0.26.0+ — for AI-mediated FEATs only)
111
+ _If this FEAT puts an LLM in the user-visible path, name which of the five failure states it
112
+ must handle (per `docs/ai-failure-states.md`). At minimum: which fallback handler is called for
113
+ each applicable state. See `/ai-failure-states` skill._
114
+ - **Garbage output:** <declared response in this FEAT — e.g., schema-validate; on fail call `handleGarbageResponse()`>
115
+ - **Refusal:** <e.g., detect refusal pattern; route to /support; never loop>
116
+ - **Hallucination:** <e.g., verify citations against database; if low confidence, surface "double-check" UI>
117
+ - **Timeout:** <e.g., 8s hard cap; on timeout return last-known-good with `handleTimeout()` annotation>
118
+ - **Cost spike:** <e.g., 4k input cap / 1k output cap; on cap return labeled-truncated result>
119
+
120
+ _Omit this section if no LLM in user-visible path. Acceptance criteria above should reference
121
+ at least one failure-state path (e.g., "refusal routes to /support, not the spinner")._
122
+
123
+ ## Out of scope
124
+ _What this FEAT explicitly does NOT do. Future FEATs may; this one doesn't._
125
+ - …
126
+
127
+ ## Notes
128
+ _Open questions, links to the idea/canvas, anything the builder needs._
129
+ - Source idea: [IDEA-NNN](IDEA-NNN-<slug>.md)
130
+ - Canvas (if any): [IDEA-NNN-canvas.md](IDEA-NNN-canvas.md)
131
+ ```
132
+
133
+ ## Rules
134
+
135
+ - One FEAT per concern. A feature that needs three smoke checks is probably two features.
136
+ - Acceptance criteria are testable statements, not vibes. "Feels fast" → "Initial page render < 1s on a cold reload."
137
+ - Out-of-scope is load-bearing. Naming what's *not* in this FEAT prevents the scope creep that kills MVPs.
138
+ - Spec a delegation, not just a feature (Ethan Mollick, 2026). A FEAT is a brief you hand to a coder
139
+ (human or agent), so it should answer two things the acceptance criteria don't: **what will *you*
140
+ verify** before it's done (not "tests pass" — the one or two things you'll click/read to trust it),
141
+ and **what's out of the agent's authority** (decisions it must surface to you, not make — a schema
142
+ change, a new dependency, anything irreversible). Don't write "know what good looks like" platitudes;
143
+ write the checkable line.
144
+ - The spec is a contract with future-you, not paperwork. Keep it short enough that you'll actually re-read it mid-build.
145
+ - Don't spec a maybe. If the riskiest assumption is still wide open, you're not ready — go run an experiment instead.