npm - bossbuild - Versions diffs - 0.97.0 - Mend

bossbuild 0.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

package/LICENSE +21 -0
package/PRINCIPLES.md +70 -0
package/README.md +213 -0
package/VERSION +1 -0
package/bin/boss +3 -0
package/library/README.md +19 -0
package/library/agents/.gitkeep +0 -0
package/library/agents/mentor-venture.md +57 -0
package/library/hooks/.gitkeep +0 -0
package/library/hooks/auto-log.js +133 -0
package/library/hooks/memory-cue.js +82 -0
package/library/hooks/secrets-guard.js +87 -0
package/library/memory-seed/README.md +29 -0
package/library/memory-seed/durable-facts-example.md +16 -0
package/library/practices/.gitkeep +0 -0
package/library/practices/agent-security.md +111 -0
package/library/practices/ai-adoption-culture.md +104 -0
package/library/practices/ai-ux-patterns.md +246 -0
package/library/practices/celebration-of-done.md +100 -0
package/library/practices/conscience-voicing.md +121 -0
package/library/practices/context-discipline.md +116 -0
package/library/practices/design-system.md +152 -0
package/library/practices/git-workflow.md +119 -0
package/library/practices/harm-taxonomy.md +45 -0
package/library/practices/quality-ratchet.md +48 -0
package/library/practices/revalidation.md +57 -0
package/library/practices/scalable-architecture.md +111 -0
package/library/practices/ship-it-live.md +149 -0
package/library/practices/skill-authoring.md +70 -0
package/library/skills/.gitkeep +0 -0
package/library/skills/boss-learn/SKILL.md +63 -0
package/library/skills/boss-sync/SKILL.md +48 -0
package/package.json +49 -0
package/registry/CHANGELOG.md +2737 -0
package/src/board.js +655 -0
package/src/brain.js +288 -0
package/src/cli.js +542 -0
package/src/conscience.js +426 -0
package/src/insights.js +147 -0
package/src/learn.js +92 -0
package/src/map.js +103 -0
package/src/modes.js +82 -0
package/src/paths.js +36 -0
package/src/registry.js +34 -0
package/src/scaffold.js +138 -0
package/src/sync.js +292 -0
package/src/team.js +103 -0
package/stages/L0-quickstart/manifest.json +12 -0
package/stages/L0-quickstart/template/.claude/agents/coder-generalist.md +31 -0
package/stages/L0-quickstart/template/.claude/agents/mentor-venture.md +57 -0
package/stages/L0-quickstart/template/.claude/agents/pm.md +28 -0
package/stages/L0-quickstart/template/.claude/hooks/conscience.js +89 -0
package/stages/L0-quickstart/template/.claude/hooks/lib/loop-runtime.js +507 -0
package/stages/L0-quickstart/template/.claude/hooks/lib/yaml.js +163 -0
package/stages/L0-quickstart/template/.claude/hooks/memory-cue.js +82 -0
package/stages/L0-quickstart/template/.claude/hooks/secrets-guard.js +87 -0
package/stages/L0-quickstart/template/.claude/rules/your-app-code.md +17 -0
package/stages/L0-quickstart/template/.claude/settings.json +36 -0
package/stages/L0-quickstart/template/.claude/skills/boss/SKILL.md +161 -0
package/stages/L0-quickstart/template/.claude/skills/boss-learn/SKILL.md +63 -0
package/stages/L0-quickstart/template/.claude/skills/boss-sync/SKILL.md +55 -0
package/stages/L0-quickstart/template/.claude/skills/canvas/SKILL.md +112 -0
package/stages/L0-quickstart/template/.claude/skills/comprehend/SKILL.md +72 -0
package/stages/L0-quickstart/template/.claude/skills/decide/SKILL.md +122 -0
package/stages/L0-quickstart/template/.claude/skills/feedback/SKILL.md +68 -0
package/stages/L0-quickstart/template/.claude/skills/import/SKILL.md +73 -0
package/stages/L0-quickstart/template/.claude/skills/persona/SKILL.md +92 -0
package/stages/L0-quickstart/template/.claude/skills/prototype/SKILL.md +114 -0
package/stages/L0-quickstart/template/.claude/skills/triage/SKILL.md +104 -0
package/stages/L0-quickstart/template/.claude/skills/welcome/SKILL.md +262 -0
package/stages/L0-quickstart/template/AGENTS.md +31 -0
package/stages/L0-quickstart/template/CLAUDE.md +57 -0
package/stages/L0-quickstart/template/docs/IDS.md +42 -0
package/stages/L0-quickstart/template/docs/ideas/INDEX.md +24 -0
package/stages/L0-quickstart/template/docs/loops/canvas-loop.md +90 -0
package/stages/L0-quickstart/template/docs/loops/capture-loop.md +64 -0
package/stages/L1-mvp/manifest.json +12 -0
package/stages/L1-mvp/template/.claude/agents/mentor-architect.md +124 -0
package/stages/L1-mvp/template/.claude/agents/mentor-cofounder.md +85 -0
package/stages/L1-mvp/template/.claude/agents/mentor-gtm.md +49 -0
package/stages/L1-mvp/template/.claude/agents/program-manager.md +46 -0
package/stages/L1-mvp/template/.claude/agents/tester.md +42 -0
package/stages/L1-mvp/template/.claude/hooks/auto-log.js +133 -0
package/stages/L1-mvp/template/.claude/rules/feature-context.md +18 -0
package/stages/L1-mvp/template/.claude/skills/ai-cost/SKILL.md +249 -0
package/stages/L1-mvp/template/.claude/skills/ai-failure-states/SKILL.md +226 -0
package/stages/L1-mvp/template/.claude/skills/ai-first-init/SKILL.md +227 -0
package/stages/L1-mvp/template/.claude/skills/close/SKILL.md +170 -0
package/stages/L1-mvp/template/.claude/skills/consult/SKILL.md +72 -0
package/stages/L1-mvp/template/.claude/skills/cost-review/SKILL.md +204 -0
package/stages/L1-mvp/template/.claude/skills/design-tokens-init/SKILL.md +192 -0
package/stages/L1-mvp/template/.claude/skills/drift-deep/SKILL.md +170 -0
package/stages/L1-mvp/template/.claude/skills/evals/SKILL.md +154 -0
package/stages/L1-mvp/template/.claude/skills/extract/SKILL.md +209 -0
package/stages/L1-mvp/template/.claude/skills/judge-traces/SKILL.md +68 -0
package/stages/L1-mvp/template/.claude/skills/log/SKILL.md +64 -0
package/stages/L1-mvp/template/.claude/skills/practice/SKILL.md +92 -0
package/stages/L1-mvp/template/.claude/skills/pretotype/SKILL.md +95 -0
package/stages/L1-mvp/template/.claude/skills/red-team/SKILL.md +137 -0
package/stages/L1-mvp/template/.claude/skills/revalidate/SKILL.md +51 -0
package/stages/L1-mvp/template/.claude/skills/ship/SKILL.md +105 -0
package/stages/L1-mvp/template/.claude/skills/smoke/SKILL.md +43 -0
package/stages/L1-mvp/template/.claude/skills/spec/SKILL.md +145 -0
package/stages/L1-mvp/template/claude-append.md +122 -0
package/stages/L1-mvp/template/docs/loops/ai-failure-state-loop.md +107 -0
package/stages/L1-mvp/template/docs/loops/coordination-loop.md +116 -0
package/stages/L1-mvp/template/docs/loops/cost-budget-loop.md +117 -0
package/stages/L1-mvp/template/docs/loops/cost-review-loop.md +113 -0
package/stages/L1-mvp/template/docs/loops/design-tokens-loop.md +98 -0
package/stages/L1-mvp/template/docs/loops/drift-loop.md +149 -0
package/stages/L1-mvp/template/docs/loops/extraction-loop.md +128 -0
package/stages/L1-mvp/template/docs/loops/focus-loop.md +106 -0
package/stages/L1-mvp/template/docs/loops/pretotype-loop.md +88 -0
package/stages/L1-mvp/template/docs/loops/spec-loop.md +83 -0
package/stages/L2-v1/manifest.json +12 -0
package/stages/L2-v1/template/.claude/agents/db-architect.md +91 -0
package/stages/L2-v1/template/.claude/agents/mentor-business.md +124 -0
package/stages/L2-v1/template/.claude/agents/mentor-fundraising.md +72 -0
package/stages/L2-v1/template/.claude/agents/mentor-pitch.md +84 -0
package/stages/L2-v1/template/.claude/agents/mentor-talent.md +84 -0
package/stages/L2-v1/template/.claude/agents/ui-designer.md +81 -0
package/stages/L2-v1/template/.claude/agents/ux-designer.md +87 -0
package/stages/L2-v1/template/.claude/skills/board/SKILL.md +98 -0
package/stages/L2-v1/template/.claude/skills/design-review/SKILL.md +77 -0
package/stages/L2-v1/template/.claude/skills/ux-check/SKILL.md +93 -0
package/stages/L2-v1/template/claude-append.md +59 -0
package/stages/L2-v1/template/docs/loops/design-drift-loop.md +108 -0
package/stages/L3-scale/README.md +13 -0

package/library/practices/harm-taxonomy.md ADDED Viewed

@@ -0,0 +1,45 @@
+---
+id: PRACTICE-harm-taxonomy
+type: practice
+owner: mentor-humane
+status: active
+host: stack-neutral
+provenance: distilled via /vet RVW-045 from Anthropic's Unified Harm Framework (5 dimensions) + Ada Lovelace Institute's advanced-AI-assistant harms (4 relationship-harms). Re-homed from a BOSS-local agent edit into a shippable practice per the mentor-architect boundary verdict (2026-06-20) — the humane lens is cross-cutting, so it belongs in a practice every mentor + the conscience can cite, not inside one agent. BOSS v0.84.0.
+---
+# Practice — Harm taxonomy (name the axis, don't improvise)
+> **Where this sits.** The humane lens (Principle #6) is *cross-cutting* — it belongs inside every
+> mentor, the conscience, and the canvas's Risks & Harms cell, not behind one agent's door. This is the
+> shared vocabulary they all reason against, so "who could this harm?" gets a checklist instead of a
+> vibe. Its product-design twin is the dark-pattern checklist in [`ai-ux-patterns.md`](ai-ux-patterns.md).
+"Who could this harm?" is sharper reasoned against *named axes*. Two complementary lenses — run a choice
+past both before you say "no harm":
+## Five harm dimensions (Anthropic, Unified Harm Framework)
+**physical · psychological · economic · societal · individual-autonomy** — each weighted by
+*likelihood × scale*. "Individual autonomy" maps straight onto BOSS's attention/agency/dignity language.
+## Four relationship-harms of personalized AI (Ada Lovelace)
+**manipulation · emotional dependence · anthropomorphism · overreliance** — the diffuse,
+relationship-level harms conventional product-safety thinking misses, and the ones most live for a
+founder building a companion / affective AI product.
+## How to use it
+- **mentor-venture / `/canvas`:** at the Risks & Harms cell, walk both lists — name the *worst-served*
+  person, not an abstraction.
+- **The conscience:** when a moment fires on a harm, name *which axis* it's on (specific beats "this
+  seems bad").
+- **The founder's product:** pair with `ai-ux-patterns.md`'s dark-patterns + `/red-team --humane`.
+- **Reflexively — BOSS's own voice:** anthropomorphism / overreliance discipline BOSS too. The "seasoned
+  hand who doesn't need the credit" resists para-social pull, performed warmth, and being leaned on as an
+  oracle.
+## Altitude / JIT
+Not a wall on a Quickstart. The conscience surfaces the relevant axis JIT; the full taxonomy is the
+reasoning *behind* a humane nudge, not a checklist shoved at a day-one founder (Principle #2).

package/library/practices/quality-ratchet.md ADDED Viewed

@@ -0,0 +1,48 @@
+---
+id: PRACTICE-quality-ratchet
+type: practice
+owner: pm
+status: active
+host: stack-neutral
+provenance: ported UP from the dhun dogfood (.ratchet/ + /code-health gate) via the 2026-06-20 method scan — BOSS v0.48.0
+---
+# Practice — The quality ratchet (a baseline that only moves the right way)
+> **The problem this kills: silent backsliding.** A codebase accumulates the thing you're trying to
+> reduce — `unwrap()`s, `any`s, TODOs, untyped boundaries, skipped tests, bundle bytes — one
+> reasonable-looking change at a time. No single diff looks bad; the trend is the damage. A ratchet
+> turns the trend into a gate: pick one number, write it down, and let it move only in the direction
+> you want.
+## The mechanism (deliberately minimal)
+1. **Pick one metric** that proxies the quality you care about and can be counted mechanically
+   (a grep count, a coverage %, a bundle size, a lint-violation total).
+2. **Baseline it.** Write the current number to one file (e.g. `.ratchet/<metric>.txt`). One number,
+   no ceremony.
+3. **Gate regressions.** A check (CI, a hook, a `/smoke` step) recomputes the number; if it moved the
+   wrong way, the gate trips. Improving the number updates the baseline (the ratchet clicks tighter).
+4. **Log the why, elsewhere.** When the baseline moves — especially if an override let it loosen — a
+   one-line note in a devlog/CHANGELOG records *why*. The ratchet file stays a bare number; the
+   reasoning lives where reasoning lives.
+## Why one number, one direction
+The power is in the constraint. A dashboard of twelve metrics gets ignored; one number with a hard
+direction is a decision you can't accidentally skip. The ratchet doesn't ask you to fix everything —
+it asks you to **never make it worse**, which is the achievable version of quality discipline on a
+fast-moving build. (Principle #1: extract the reusable discipline, not a one-off cleanup.)
+## When to reach for it vs. when not to
+- **Reach for it** when something you care about degrades gradually and reversibly, and a single
+  countable proxy exists (error-handling debt, type coverage, test count, a11y violations).
+- **Don't** ratchet a number that can't be gamed-proof or that punishes legitimate growth (raw LOC,
+  file count). A ratchet on the wrong metric just adds friction. And don't add the gate before the
+  metric has earned it — premature ceremony (Principle #2).
+## Relationship to BOSS
+A CLI `boss ratchet` is a *possible* DOWN later — but only once a metric earns it. For now this is the
+named pattern a founder (or BOSS's own repo) can apply by hand or wire into `/smoke`. See `IDEA-027`.

package/library/practices/revalidation.md ADDED Viewed

@@ -0,0 +1,57 @@
+---
+id: PRACTICE-revalidation
+type: practice
+owner: pm
+status: active
+host: stack-neutral
+provenance: ported UP from the dhun dogfood (docs/workflows/lifecycles/REVALIDATION.md) via the 2026-06-20 method scan — BOSS v0.48.0
+---
+# Practice — Revalidation (the 3-line gate before paused work re-enters build)
+> **The problem this kills: zombie features.** Work gets deferred for a good reason — a blocker, a
+> capacity call, a "not yet." Months later it gets picked back up *on momentum*, never re-checked
+> against a world that moved. The result is building something the project no longer needs. The fix
+> is a deliberately tiny gate: three questions, answerable in a minute, before any paused item
+> re-enters the build.
+## The gate
+Before a `deferred`/`paused` idea or feature becomes active again, its owner answers three lines:
+1. **Still relevant?** — Is the pain/opportunity it addresses still real and still felt?
+2. **Still aligned?** — Does it still fit the current goal / canvas / roadmap, or did the direction move?
+3. **Has anything changed the answer?** — New evidence, a shipped dependency, a host/model shift, a
+   competitor, a dead assumption?
+## The outcome matrix
+| Answers | Outcome |
+|---|---|
+| All three **yes** | **Revive.** Move to active; carry on. |
+| Any **no** | **Rescope or kill.** Don't build it as-was. Either reshape it to the new reality or close it with a reason logged. |
+| **Unclear** | **Re-pause** with a *new* `paused_reason` and a `next_review` date. Don't let it drift back in by default. |
+## Why it's three lines, not a re-spec
+The gate has to be *cheaper than the temptation to skip it*. A full re-spec is ceremony; people skip
+ceremony and build the zombie. Three questions clear the bar of "I'll actually do this" — that's the
+whole design. (Principle #2: just-in-time, never premature ceremony.)
+## When it fires
+- A board / RESUME item flagged **stale** (no movement past its `next_review`, or 14d+ untouched).
+- Any time someone says "let's pick up X" about deferred work.
+- Default review intervals when an item is paused: *blocked* → when the blocker ships + 1 week;
+  *capacity* → +90 days. Set `next_review` so the gate has a trigger.
+## Frontmatter hooks (what makes it automatable)
+The gate runs off metadata the doc already carries: `status: deferred`, `paused_at`, `paused_reason`,
+`next_review`. A board can surface "time to revalidate" the moment `next_review` passes — the practice
+and the metadata are two halves of one mechanism.
+## BOSS's own dogfood
+BOSS's `docs/RESUME.md` carries a long deferred list and a backlog catalog (IDEA-012). This practice
+is what `/revalidate` runs against them — BOSS eats it first. See `IDEA-027`.

package/library/practices/scalable-architecture.md ADDED Viewed

@@ -0,0 +1,111 @@
+---
+id: PRACTICE-scalable-architecture
+type: practice
+owner: mentor-architect
+status: active
+host: stack-neutral
+provenance: distilled from the 2026-06-20 founding-teams research (RESEARCH-COMPENDIUM-2026-06-20 Part B7 — technical scaffolding that survives the climb) — Fowler MonolithFirst, Shopify's 2.8M-line modular monolith [EVIDENCE], Bezos one-way/two-way doors, Factory.ai "documented conventions rot; enforced conventions compound", Notion's eslint ratchet — BOSS v0.89.0, FEAT-023 thread 2
+---
+# Practice — Architecture that survives the climb (defer the tax, enforce the conventions)
+> **The spine.** The value is never the rule — it's the **enforcement loop**. A convention an agent (or
+> the fiftieth hire) can't enforce against itself rots within a week. AI sharpens this to a point: an agent
+> re-derives the codebase's conventions from scratch every session, so anything you only *wrote down*
+> drifts, and anything you *encoded as a check* compounds. Factory.ai's line is the cleanest statement of
+> it: **"documented conventions rot; enforced conventions compound."** This practice is two moves —
+> **defer the architecture costs you can defer**, and **encode the conventions you can't afford to lose** —
+> so a fast AI build doesn't dig a hole that's expensive to climb out of.
+## Modular-monolith-first, extract when forced
+Start with **one deployable**. Resist the pull to split into services before a real seam demands it
+(Fowler's MonolithFirst; Shopify runs a **2.8M-line modular monolith** [EVIDENCE] — "microservice envy"
+is on the *Hold* ring of the ThoughtWorks radar for a reason). The distributed-systems tax — network
+calls, partial failure, deploy orchestration, eventual consistency — is real and you pay it forever once
+you take it on. At n=1–5 with an AI writing most of the code, that tax buys you nothing and costs you
+velocity.
+- **The *modular* half is the part that's load-bearing.** A monolith isn't a license for a mud-ball.
+  Keep clear module boundaries *inside* the single deployable — each feature/domain owns its code, talks
+  to others through a named interface, doesn't reach into another's internals. That's what makes the
+  eventual extraction *cheap when forced*: you cut along a seam that already exists, instead of
+  untangling one that doesn't. A `FEAT` is a natural module.
+- **"Extract when forced"** means the trigger is a real constraint — a scaling wall, a team that needs
+  independent deploys, a compliance boundary — not an aesthetic preference or a blog post. Until then,
+  the monolith is the right answer, not the embarrassing one.
+## Spend the rigor on the one-way doors — and the schema is the one
+Most architecture decisions are two-way doors (Bezos): reversible, so decide fast and cheap. **One is
+not — the database schema.** Data outlives code; a bad migration on live data is the change you can't
+take back. So that's where the ceremony goes, from the *first table*:
+- **Migrations-as-code from day one.** Every schema change is a versioned, reviewed, replayable migration
+  file in the repo — never a hand-run `ALTER TABLE` on the database. This is the single thing that's
+  genuinely expensive to retrofit (evodb / evolutionary-database discipline), so it's the one piece of
+  upfront ceremony that pays for itself immediately.
+- **The migration log *is* a guardrail against AI schema drift.** An agent generating schema changes
+  across sessions will happily diverge — add a column here, rename one there, no record of why. The
+  migrations directory turns that into a reviewable, ordered, ownable history: the same artifact that
+  lets you replay the schema also lets a human (and the next agent) see what changed and veto what
+  shouldn't have. Wire schema changes into the **high-risk review tier** (see
+  [`git-workflow.md`](git-workflow.md)).
+## Conventions as code (enforced, not remembered)
+A convention only counts if a machine enforces it — otherwise it's a suggestion an agent forgets.
+- **Formatting is law, not taste.** Adopt a formatter and let it end the style debate permanently
+  (Prettier, or **Biome** — its single binary is genuinely agent-friendly for a new repo: one tool, no
+  config archaeology). No human and no agent argues about formatting again; the tool decides on save.
+- **Boundaries as lint.** The module boundaries above are worth nothing if anyone can quietly cross them.
+  Encode them as lint rules / import boundaries / architectural fitness functions so a crossing *fails a
+  check*, not *gets noticed in review* (which it won't, at AI output speed). Start with the principle and
+  the lint rules you can write today; the heavier custom-plugin / fitness-function tooling is **NOT-YET**
+  until the surface area earns it (Principle #2).
+- **Types at the boundary.** Where the stack has them, turn the strict knobs on early (`strict` mode,
+  no-implicit-any) — they're the cheapest enforced convention there is, and they're far more painful to
+  retrofit onto a large AI-generated codebase than to start with.
+## The ratchet holds the line (extends `quality-ratchet`)
+The mechanism for *keeping* any of this — module-boundary crossings, schema-drift, type violations,
+skipped tests — is the [`quality-ratchet`](quality-ratchet.md) already in BOSS's library: pick one
+countable number, baseline it, and let it move **only the right way**, gated in `/smoke`. The point of
+naming it here is the architectural application: **the gate holds the line, not the reviewer.** At AI
+output speed a human reviewer cannot be the thing standing between the codebase and a thousand small
+regressions — a "no-new-violations" baseline (Notion's eslint-seatbelt pattern) can. Don't restate the
+ratchet mechanics; reach for that practice and point it at an architecture metric.
+## One canonical context file (don't re-derive the rules)
+The conventions an agent *can't* lint — the why-we're-shaped-this-way, the domain language, the
+landmines — belong in **one canonical, version-controlled context file** (CLAUDE.md / the AGENTS.md
+standard), pruned ruthlessly. The 2026 finding is counterintuitive and worth holding: **the failure mode
+is over-length, not under-spec** — a bloated context file gets *ignored* by the model, which is exactly
+why BOSS keeps its wayfinding lean and JIT-loads the rest. This is [`context-discipline`](context-discipline.md)'s
+territory; named here only because architecture-at-scale lives or dies on whether the next agent inherits
+the decisions or re-invents them.
+## Altitude / JIT (right rigor, right rung)
+Not a Quickstart lecture, and not all at once. **Modular boundaries** matter from the first real build
+(MVP — clear module seams as features accumulate). **Migrations-as-code** arrives at the *first table*,
+not before — a prototype with mock data needs no migration log. **Conventions-as-code** earns its place
+the moment a *second* author touches the repo — and a second *agent* counts, which is sooner than most
+solo founders expect. The distributed-services question is a **V1→Scale** concern; bringing it up at MVP
+is the premature-ceremony failure (Principle #2).
+## Relationship to BOSS
+This extends [`quality-ratchet`](quality-ratchet.md) (the enforcement mechanism) and sits alongside
+[`git-workflow`](git-workflow.md) (the daily flow that merges these conventions safely),
+[`context-discipline`](context-discipline.md) (the one context file), and
+[`agent-security`](agent-security.md) (the human-gate on the irreversible — of which the schema migration
+is the canonical case). A CLI `boss migrate` / a scaffolded migrations directory is a *possible* DOWN
+later, once a real project hits its first table and the by-hand discipline has earned the machinery
+(Principle #1). The `mentor-architect` agent is the natural place to surface this JIT to a founder; that
+DOWN is **deferred** only to avoid a live edit collision, not on judgment. The org/scaling half of
+"surviving the climb" — DRI, give-away-your-Legos, stage breakpoints — is FEAT-023 thread 3 (the
+unauthored V1→Scale rung). See FEAT-023.

package/library/practices/ship-it-live.md ADDED Viewed

@@ -0,0 +1,149 @@
+---
+id: PRACTICE-ship-it-live
+type: practice
+owner: mentor-architect
+status: active
+host: stack-neutral
+provenance: distilled from the 2026-06-21 CD/deploy deep-research pass (SESSION-2026-06-20-cd-deploy-research — 21 sources, 25 claims adversarially verified 3-vote, 22 confirmed / 3 killed) — DORA/Accelerate 2022-2024 [EVIDENCE], Fowler ParallelChange [EVIDENCE], Willison lethal-trifecta + OWASP LLM Top 10, CVE-2025-48757 (Lovable) + the MoltBook breach as the named vibe-coded incidents — BOSS v0.92.0, FEAT-024
+---
+# Practice — Ship it live (the CD half: localhost is not shipped)
+> **The shape of the problem.** An app that only runs on `localhost` **is a pseudo app.** You cannot
+> prove pain, workflow fit, or willingness-to-pay on a thing no real user can reach — the validation
+> conscience ("what does this prove?") has no teeth if the artifact was never put where someone could
+> prove it. `git-workflow` covered **CI** (is `main` green, can two humans stand behind the merge);
+> this is **CD** — *is this where a real user can hit it, or just you?* The URL is the proof. Everything
+> below is in service of getting that URL early, keeping the deploy reversible, and not leaking your
+> users' data on the way out the door (which, in 2025-26, is exactly how the AI-built app fails).
+## Deploy early, deploy cheap, deploy reversible
+Get a real URL at **MVP**, not at launch. Pick the smallest viable host and pick
+reversible-and-cheap over impressive (Principle #5 — optionality: a private URL on a free tier
+preserves every later choice; a hand-rolled Kubernetes cluster forecloses them). The host is the
+project's call and the project's stack decides it (Principle #4) — Vercel, Fly, Railway, Cloudflare,
+Render, a $5 VPS — the *practice* carries the judgment, not the target.
+The instinct to wait — "it's not ready, I'll deploy when it's polished" — does not survive scrutiny.
+The "reliability investment is premature at MVP, just keep iterating locally" counter-argument was
+**adversarially tested and failed (killed 0-3)**: there is no good evidence that staying on localhost
+buys you anything. What it costs you is the only thing that matters this early — *contact with a real
+user*. Deploy-day surprises (env that only existed on your laptop, a build step that only you can run,
+a secret you hard-coded) are cheaper to find on day one with one user than at launch with a hundred.
+> **localhost is not shipped.** Reachability is the gate between a pseudo app and a real one. Ship the
+> ugly version to a real URL, then iterate *in the open*.
+## Secrets & authz at the boundary — the leg with teeth
+This is where the AI-built app actually fails in 2025-26, and it fails the same way every time: **the
+agent ships a frontend that talks to the database with a public key, trusting a security rule it never
+configured.** Read the bundle, get the key, read and write everything. The founder wrote no code, so
+the founder never saw the hole.
+This is not hypothetical — it's the best-evidenced finding in the whole research pass [EVIDENCE]:
+- **CVE-2025-48757 (Lovable)** — AI-generated frontends made direct calls to the database via the
+  public anon key, relying solely on row-level security the AI never set up. **303 endpoints across
+  170+ apps (~10.3% of those scanned)** leaked PII and third-party API keys. (CVSS is disputed — 8.26
+  vs 9.3, supplier-contested — so trust the *mechanism*, not the score.)
+- **MoltBook** — a hardcoded database key plus disabled row-level security leaked **1.5M API tokens and
+  35K emails**. The founder wrote no code at all.
+A 1.5M-credential leak from a founder who couldn't grade what the AI shipped **is the pseudo→real
+thesis made literal.** So the discipline is non-negotiable at the deploy boundary:
+- **No secrets in the client.** API keys, tokens, service credentials live in server-side env, never in
+  the shipped bundle. A key in frontend JS is public the moment you deploy.
+- **Enforce authz server-side *before* the first public URL.** If the app talks to a database with a
+  public/anon key, the row-level security or access rules are what stand between your users and the
+  internet — and the AI does **not** configure them by default. Verify they're on before you ship, not
+  after the breach.
+- **A human security gate at first deploy.** For a non-technical founder this is the single highest-value
+  check, because they can't spot the vuln themselves. BOSS already owns this surface — **don't restate
+  it here, run it:** `/red-team`'s pre-ship app-security pass scans the build output + git history for
+  shipped secrets (the gap `secrets-guard` deliberately does *not* cover), and
+  [`agent-security.md`](agent-security.md) carries the lethal-trifecta / egress mechanics (Willison;
+  OWASP ranks prompt-injection #1 and notes it cannot be fully solved within current LLM architecture).
+  This practice's job is to make sure that pass *happens at deploy time* — the one moment it pays off.
+## Rollback ≠ reversible: schema is the one-way door
+"I can roll back" is half-true, and the half that's false is the one that hurts. **Instant rollback
+restores only the application artifact** — it re-points the domain at a previous build. It does **not**
+revert your database, and usually not your env either (Vercel documents this limitation against its own
+rollback feature, which is why it's safe to trust). The code goes back; the migration that dropped a
+column does not.
+So shipping reversibly means two separate disciplines:
+- **Name the revert path before you deploy.** A deploy you can't undo is a one-way door. Know the one
+  command (or one click) that puts the last-good build back, and that it doesn't depend on the deploy
+  that just broke.
+- **Make schema changes backward-compatible — expand-migrate-contract** (Fowler, *ParallelChange*
+  [EVIDENCE]). Add the new column/table (expand), move code and data to it while both shapes work
+  (migrate), drop the old one only once nothing reads it (contract). Each phase is releasable and
+  rollback-safe on its own, so a code rollback never strands a database that already moved on. This is
+  [`scalable-architecture.md`](scalable-architecture.md)'s "schema = the one-way door" — **deploy is
+  *when* it pays off.** Cross-reference for the migration mechanics; the deploy-time teeth are here.
+## The honesty anchor (don't sell yourself the safe deploy)
+**DORA 2024 [EVIDENCE]: AI adoption correlated with *worse* software-delivery stability *and*
+throughput** — driven by larger batches. This is the deploy-time twin of `git-workflow`'s METR anchor:
+shipping faster with an agent does not make your deploys safer, and the felt speedup is not the
+instrument. The instrument is **measured** — change-failure rate and time-to-restore (DORA's stability
+metrics are first-class, not an afterthought to deploy-frequency).
+One honesty caveat that cuts the other way: it is tempting to claim "but small batches offset the AI
+penalty." The research **killed that inference (1-2)** — DORA does not conclude it. Keep batches small
+because `git-workflow` already earns it (review capacity), not because it's a proven antidote to the
+stability hit. State the finding; don't oversell the fix.
+## Preview-per-branch — a judgment, not the review primitive
+A preview URL per branch *can* be what the other human actually reviews, and several platforms make it
+nearly free. But this is the one place to resist the vendor pitch: **the claim that preview-per-branch
+tightens the review loop is platform-positioning, not independently proven** (it was the single counter-
+argument that survived verification in the research pass). For a two-person team it is often
+**over-ceremony** — the review capacity cap in `git-workflow` is the real constraint, and a preview env
+doesn't raise it.
+So: reach for preview environments when review *load* grows enough to earn them — multiple concurrent
+branches, a reviewer who isn't the author, a stakeholder who needs to click before merge. Not as a
+mandate at MVP. Right ceremony, right rung (Principle #2).
+## Stack-neutral capture (feed the loop)
+The host, the deploy command, the rollback path, the env-var boundary you settled on — that's a
+**stack-profile output of the learning loop** (Principle #4). Capture it as a `PRAC-NNN` (via
+`/practice`) or a stack profile so the *next* project of that kind starts from a known-good deploy
+recipe instead of rediscovering it. BOSS assumes no deploy target; it *learns* yours and carries it UP.
+## Altitude / JIT (don't front-load it)
+This is **not** a Quickstart lecture. A founder dropping an idea into `/prototype` runs it locally and
+that's correct — sketch freely, no deploy discipline. The reachability discipline earns its place at
+**MVP**, when the work is building-for-real and validation needs an artifact a real user can reach. The
+secrets-at-the-boundary check surfaces the first time the app talks to a database or holds a key; the
+schema discipline surfaces at the first migration; preview environments surface only when review load
+asks for them. Right ceremony, right rung (Principle #2).
+## Relationship to BOSS
+BOSS already ships the security *mechanism* this practice leans on — `/red-team`'s pre-ship pass (the
+shipped-secret scan), [`agent-security.md`](agent-security.md) (the deny-list floor, `secrets-guard`,
+the lethal-trifecta containment), and [`scalable-architecture.md`](scalable-architecture.md)
+(migrations-as-code). This practice is the **operating discipline that fires them at deploy time**, plus
+the deploy-specific judgment a pre-AI CD guide wouldn't have carried: the client-bundled-key trap as the
+*signature* AI-built failure, and rollback-is-not-reversibility. The runner is **`/ship`** — the
+deterministic "put it where a real user can hit it" verbs, with the secrets/authz pre-flight wired in as
+a *check, not a gate* (conscience-not-censor).
+**Reachable → discoverable.** Reachability is the *first* leg this practice closes; the next is the one the
+conscience otherwise never voices — *will anyone find it?* `/ship` carries a single demand voicing at the
+live moment (*"who's the first real user, and how do they hit this?"*), situation-not-person, pointing at
+`mentor-gtm` for depth — never a marketing nag. That's the distribution-leg asymmetry (IDEA-041) closed at
+the `/ship` moment rather than as an unprompted hook. See [`git-workflow.md`](git-workflow.md) (the CI half
+this extends), and FEAT-024.

package/library/practices/skill-authoring.md ADDED Viewed

@@ -0,0 +1,70 @@
+# Practice: Skill authoring — write skills the model actually triggers and follows
+> Adapted from Anthropic's own `skill-creator` skill via [RVW-013](../../docs/research/verdicts/RVW-013-skill-creator-authoring-discipline.md).
+> The wisdom, not the harness: the with/without eval machinery is deferred (see *What's left out*).
+> This is ground for anyone authoring a skill — BOSS authoring its own, and (UP candidate) a founder
+> authoring one in their scaffolded project.
+A skill is a behavior you're handing to a model, not a config file. It fails two ways: it never
+fires when it should (under-triggering), or it fires and the model can't follow it (rigid or vague).
+Three disciplines fix most of both.
+## 1. Explanatory over prescriptive
+Tell the model *why*, and it generalizes to the cases you didn't enumerate. Give it a wall of rules,
+and it follows them literally past the point of sense — and rots the moment the model improves
+underneath it (the IDEA-014 problem: frozen behavior fighting a better model).
+> Yellow flag: if you're writing `ALWAYS` / `NEVER` in all caps, or nesting rigid step-structures,
+> stop. That's usually fear talking, not clarity. State the reasoning and trust the model to apply it.
+A handful of caps for genuine load-bearing invariants is fine. A skill *made of* them is a skill
+that doesn't trust its reader — and BOSS's voice assumes intelligence. (Re-read your own draft: most
+`ALWAYS` lines become a single "because" sentence.)
+## 2. Progressive disclosure — three levels of loading
+Context is a budget. Structure the skill so each level loads only when needed:
+1. **Metadata (the `description`)** — always in context, ~100 words. This is what the model reads to
+   *decide whether to trigger*. It earns the most care (see #3).
+2. **The SKILL.md body** — loaded only when the skill fires. Keep it tight (the source says <500
+   lines; most should be far shorter). This is the *how*.
+3. **Bundled resources** — templates, examples, reference files — loaded on demand from the body,
+   never up front.
+Don't pour the whole method into the description, and don't make the body re-explain what the
+description said. Each level does its own job once.
+## 3. Descriptions earn their triggers
+Under-triggering is the common failure: a useful skill that never fires because its description is
+shy. The fix is an *explicit*, specific description — name the trigger phrases, the situations, the
+adjacent cases — so the model recognizes the moment. The source skill calls this writing "pushy"
+descriptions.
+**The BOSS adaptation:** pushy ≠ shouting. A description earns its triggers by being *concrete*
+("when the user asks X, or is doing Y, or says any of …"), not by adding urgency words. List the
+real trigger phrases; name what it is *not* for (the boundary is as load-bearing as the trigger).
+Look at the existing skills — `/vet`, `/boss-learn`, `/consult` — for the pattern: a one-line *what*,
+then a precise *when*, then the inverse it's distinct from.
+## Self-check before shipping a skill
+A short read, not a harness:
+- **Trigger test:** read only the `description`. Would *you* know exactly when to fire it, and when
+  not to? If the boundary is fuzzy, the model's will be fuzzier.
+- **Rigidity test:** count the `ALWAYS`/`NEVER`/caps. Each one — can it become a "because"?
+- **Level test:** is anything in the body that belongs in a bundled resource? Anything in the
+  description that belongs in the body?
+- **Voice test:** does it assume intelligence and never assume knowledge? (Hand to `voice-keeper` if
+  unsure.)
+## What's left out (deliberately)
+The source skill ships a **with-skill vs without-skill eval harness** (`workspace/iteration-N/eval-ID/`,
+parallel runs, scored comparison). BOSS does **not** adopt that yet — it's real ceremony, and it
+duplicates the question `/vet` and `conscience-evals/` already ask ("does this beat the baseline?").
+**Re-open** when a shipped skill's value is genuinely disputed and a careful read can't settle it;
+then a with/without comparison earns its weight. Logged in [IDEA-033](../../docs/ideas/IDEA-033-2026-rigor-and-bestpractice-gaps.md).

package/library/skills/.gitkeep ADDED Viewed

File without changes

package/library/skills/boss-learn/SKILL.md ADDED Viewed

@@ -0,0 +1,63 @@
+---
+name: boss-learn
+description: Route a proven pattern two ways — UP into the BOSS library as a reusable superset practice, or DOWN into this app as hardened core functionality. The judgment layer over `boss learn`. Usage - /boss-learn [what to promote]
+---
+# /boss-learn — the two-destination router
+PRINCIPLES #1: BOSS is always scaffolding, but at every natural breakpoint (a mode transition, a
+shipped feature, the third time the same work repeats) you **pause and sort the pattern two ways.**
+This skill is that router. It is **not** a one-way "promote to BOSS" — deciding UP vs DOWN *is* the
+work.
+## 0. Orient (silent)
+- Read `PRINCIPLES.md` (#1 especially) and `.boss/manifest.json` (which project/mode you're in).
+- Identify the concrete pattern: the file(s), prompt, workflow, convention, or token set in question.
+## 1. Decide the destination
+Ask one question if it's genuinely unclear; otherwise call it and say why.
+- **UP → BOSS library** when the pattern is *project-neutral and reusable*: a workflow, an agent or
+  skill shape, a hook, a best-practice doc, a memory seed. The test (PRINCIPLES #3): *could a sibling
+  project reuse this without copy-pasting code?* If yes, it's superset.
+- **DOWN → app core** when the pattern is *this product's actual functionality* that happens to be
+  living as scaffold/ad-hoc. It belongs in the app's own modules, with the app's own tests — not in BOSS.
+A pattern can route **both**: the generalized shape goes UP, the concrete implementation hardens DOWN.
+## 2a. Route UP
+1. **Generalize first.** Strip project specifics; replace them with `{{PLACEHOLDERS}}`. Domain logic
+   never lands in `library/` (PRINCIPLES: stack- and project-neutral only). Write/clean the file.
+2. **Pick a category:** `agents` · `skills` · `hooks` · `practices` · `memory-seed`.
+3. **Promote it:**
+   ```
+   boss learn <path-to-generalized-file-or-dir> --as <category> --note "<one line: what & why>"
+   ```
+   This copies it into `library/<category>/`, bumps `VERSION` + `package.json` (minor by default;
+   `--patch`/`--major`/`--version X.Y.Z` to override), and prepends a `registry/CHANGELOG.md` entry.
+4. **Sharpen the CHANGELOG prose** by hand — the auto entry is a stub. The CHANGELOG is what every
+   project reads via `/boss-sync`, so make it say what's new and why it matters.
+5. Tell the user: connected projects pull this via `boss sync` / `/boss-sync`.
+## 2b. Route DOWN
+No BOSS version change. This is product, not scaffold. Give concrete guidance (or do it, if asked):
+- Move the pattern from ad-hoc/scaffold into a **named, owned module/config** in the app.
+- Add the app's own tests around it; wire it into the app's real flow.
+- If a *generalizable shape* remains, note it for a follow-up UP — don't lock value into code (PRINCIPLES #3).
+## 3. Close the loop
+- Update `docs/RESUME.md` if this was a breakpoint worth recording.
+- One-line summary: what moved, which direction, the new BOSS version (if UP), the next step.
+## Rules
+- Deciding the direction is the point — never default to UP. Most app code routes DOWN.
+- Never put domain specifics in `library/`. Generalize or don't promote.
+- `boss learn` edits the BOSS **source** repo (found via the registry's self-hosted entry, or
+  `$BOSS_SRC`). Review its diff and commit deliberately — don't auto-commit BOSS.
+- One pattern per run. If the user names several, take the clearest first.

package/library/skills/boss-sync/SKILL.md ADDED Viewed

@@ -0,0 +1,48 @@
+---
+name: boss-sync
+description: Pull current BOSS practices into this project — bring the installed modes' skills/agents up to the latest version as a reviewed, narrated diff, then bump the project's BOSS pin. The judgment layer over `boss sync`. Usage - /boss-sync
+---
+# /boss-sync — bring this project current
+The distribution half of the learning loop (PRINCIPLES #1): improvements promoted UP into the BOSS
+library flow back DOWN into every connected project. This skill brings *this* project's BOSS-managed
+files (the skills + agents of its installed modes) up to the current version — reviewed, not blind.
+## 0. Orient (silent)
+- `boss status` — current mode, the project's BOSS pin, and whether newer practices exist.
+- Read `registry/CHANGELOG.md` from the BOSS source repo for **what changed since this project's pin** —
+  this is the narration you'll give the user (not just a file list).
+## 1. Preview
+Run `boss sync` (no flags). It lists each BOSS-managed file as `new`, `changed (N lines)`, or up to date,
+across all installed modes, and reconciles any stale mode label (e.g. an old `L0-sketch` pin → `L0-quickstart`).
+## 2. Review (the judgment)
+Before applying, for each **changed** file:
+- Read the project's current copy and the incoming version. Summarize what actually changes.
+- **Flag conflicts:** if the project edited a BOSS-managed file locally, a sync overwrites it. Call this
+  out by name and ask before clobbering. (v1 syncs BOSS-managed skills/agents only — see scope below.)
+- Tie changes back to the CHANGELOG entries so the user understands *why*, not just *what*.
+## 3. Apply
+- `boss sync --apply` — writes the new/changed files and bumps the project's `.boss` pin to current.
+- Then show `git diff` and let the user review and commit. The project is the source of truth for its
+  own history; BOSS just proposes the update.
+## Scope (v1)
+- Syncs only **BOSS-managed skills/agents** for installed modes.
+- Does **not** auto-merge user-editable files (`CLAUDE.md`, `.claude/settings.json`). If the CHANGELOG
+  implies those should change, surface it and let the user merge by hand.
+- New skills/agents added to a mode since the pin are pulled in; nothing is removed.
+## Rules
+- Review before `--apply`. Never overwrite a locally-edited managed file without flagging it first.
+- Narrate from the CHANGELOG — the user should learn what's new, not just see files move.
+- Don't commit for the user; hand them a clean `git diff` to review.

package/package.json ADDED Viewed

@@ -0,0 +1,49 @@
+{
+  "name": "bossbuild",
+  "version": "0.97.0",
+  "description": "BOSS (Build Out Solid Stuff) — a just-in-time startup incubator that cold-starts projects at the right level of agentic ceremony, grows them through modes (Quickstart → MVP → V1 → Scale), and mentors the founder. Make it real.",
+  "keywords": [
+    "Agentic",
+    "framework",
+    "humanetech"
+  ],
+  "homepage": "https://github.com/ajeshh/bossbuild#readme",
+  "bugs": {
+    "url": "https://github.com/ajeshh/bossbuild/issues"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ajeshh/bossbuild.git"
+  },
+  "license": "MIT",
+  "author": "Ajesh Shah",
+  "type": "module",
+  "bin": {
+    "boss": "bin/boss"
+  },
+  "directories": {
+    "doc": "docs"
+  },
+  "files": [
+    "bin/",
+    "src/",
+    "stages/",
+    "library/",
+    "VERSION",
+    "PRINCIPLES.md",
+    "registry/CHANGELOG.md"
+  ],
+  "scripts": {
+    "link": "npm install -g .",
+    "gen:docs": "node scripts/gen-docs.js",
+    "check:wayfinding": "node scripts/check-wayfinding-drift.js",
+    "pack:preview": "npm pack --dry-run",
+    "eval:gate": "node docs/architecture/conscience-evals/runner.js",
+    "eval:judgment": "node docs/architecture/conscience-evals/judgment/replay.js",
+    "eval": "npm run eval:gate && npm run eval:judgment",
+    "regrade": "node docs/architecture/conscience-evals/judgment/regrade.js"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}