npm - feed-the-machine - Versions diffs - 1.0.0 - Mend

feed-the-machine 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

package/LICENSE +21 -0
package/README.md +268 -0
package/bin/generate-manifest.mjs +210 -0
package/bin/install.mjs +114 -0
package/ftm/SKILL.md +88 -0
package/ftm-audit/SKILL.md +146 -0
package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -0
package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -0
package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -0
package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -0
package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -0
package/ftm-audit/scripts/run-knip.sh +23 -0
package/ftm-audit.yml +2 -0
package/ftm-brainstorm/SKILL.md +379 -0
package/ftm-brainstorm/evals/evals.json +100 -0
package/ftm-brainstorm/evals/promptfoo.yaml +109 -0
package/ftm-brainstorm/references/agent-prompts.md +224 -0
package/ftm-brainstorm/references/plan-template.md +121 -0
package/ftm-brainstorm.yml +2 -0
package/ftm-browse/SKILL.md +415 -0
package/ftm-browse/daemon/browser-manager.ts +206 -0
package/ftm-browse/daemon/bun.lock +30 -0
package/ftm-browse/daemon/cli.ts +347 -0
package/ftm-browse/daemon/commands.ts +410 -0
package/ftm-browse/daemon/main.ts +357 -0
package/ftm-browse/daemon/package.json +17 -0
package/ftm-browse/daemon/server.ts +189 -0
package/ftm-browse/daemon/snapshot.ts +519 -0
package/ftm-browse/daemon/tsconfig.json +22 -0
package/ftm-browse.yml +4 -0
package/ftm-codex-gate/SKILL.md +302 -0
package/ftm-codex-gate.yml +2 -0
package/ftm-config/SKILL.md +310 -0
package/ftm-config.default.yml +80 -0
package/ftm-config.yml +2 -0
package/ftm-council/SKILL.md +132 -0
package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -0
package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -0
package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -0
package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -0
package/ftm-council/references/protocols/PREREQUISITES.md +47 -0
package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -0
package/ftm-council.yml +2 -0
package/ftm-dashboard.yml +4 -0
package/ftm-debug/SKILL.md +146 -0
package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -0
package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -0
package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -0
package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -0
package/ftm-debug/references/protocols/BLACKBOARD.md +86 -0
package/ftm-debug/references/protocols/EDGE-CASES.md +103 -0
package/ftm-debug.yml +2 -0
package/ftm-diagram/SKILL.md +233 -0
package/ftm-diagram.yml +2 -0
package/ftm-executor/SKILL.md +657 -0
package/ftm-executor/references/STYLE-TEMPLATE.md +73 -0
package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -0
package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -0
package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -0
package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -0
package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -0
package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -0
package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -0
package/ftm-executor/references/protocols/MODEL-PROFILE.md +44 -0
package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -0
package/ftm-executor/runtime/ftm-runtime.mjs +252 -0
package/ftm-executor/runtime/package.json +8 -0
package/ftm-executor.yml +2 -0
package/ftm-git/SKILL.md +195 -0
package/ftm-git/evals/evals.json +26 -0
package/ftm-git/evals/promptfoo.yaml +75 -0
package/ftm-git/hooks/post-commit-experience.sh +92 -0
package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -0
package/ftm-git/references/protocols/REMEDIATION.md +139 -0
package/ftm-git/scripts/pre-commit-secrets.sh +110 -0
package/ftm-git.yml +2 -0
package/ftm-intent/SKILL.md +198 -0
package/ftm-intent.yml +2 -0
package/ftm-map.yml +2 -0
package/ftm-mind/SKILL.md +986 -0
package/ftm-mind/evals/promptfoo.yaml +142 -0
package/ftm-mind/references/blackboard-schema.md +328 -0
package/ftm-mind/references/complexity-guide.md +110 -0
package/ftm-mind/references/event-registry.md +299 -0
package/ftm-mind/references/mcp-inventory.md +296 -0
package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -0
package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -0
package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -0
package/ftm-mind/references/reflexion-protocol.md +249 -0
package/ftm-mind/references/routing/SCENARIOS.md +22 -0
package/ftm-mind/references/routing-scenarios.md +35 -0
package/ftm-mind.yml +2 -0
package/ftm-pause/SKILL.md +133 -0
package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -0
package/ftm-pause/references/protocols/VALIDATION.md +80 -0
package/ftm-pause.yml +2 -0
package/ftm-researcher.yml +2 -0
package/ftm-resume/SKILL.md +166 -0
package/ftm-resume/references/protocols/VALIDATION.md +172 -0
package/ftm-resume.yml +2 -0
package/ftm-retro/SKILL.md +189 -0
package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -0
package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -0
package/ftm-retro.yml +2 -0
package/ftm-routine.yml +4 -0
package/ftm-state/blackboard/context.json +23 -0
package/ftm-state/blackboard/experiences/index.json +9 -0
package/ftm-state/blackboard/patterns.json +6 -0
package/ftm-state/schemas/context.schema.json +130 -0
package/ftm-state/schemas/experience-index.schema.json +77 -0
package/ftm-state/schemas/experience.schema.json +78 -0
package/ftm-state/schemas/patterns.schema.json +44 -0
package/ftm-upgrade/SKILL.md +153 -0
package/ftm-upgrade/scripts/check-version.sh +76 -0
package/ftm-upgrade/scripts/upgrade.sh +143 -0
package/ftm-upgrade.yml +2 -0
package/ftm.yml +2 -0
package/install.sh +102 -0
package/package.json +74 -0
package/uninstall.sh +25 -0

package/ftm-audit/references/templates/REPORT-FORMAT.md ADDED Viewed

@@ -0,0 +1,96 @@
+# Audit Report Format
+The final output structure for a completed ftm-audit run.
+---
+## Summary Report
+```
+## FTM Audit Report — [YYYY-MM-DD HH:MM]
+### Layer 1: Static Analysis (knip)
+- Findings: [N]
+- [FINDING_TYPE] file:line — description
+- [FINDING_TYPE] file:line — description
+### Layer 2: Adversarial Audit
+- Findings: [N]
+- [FINDING_TYPE] file:line — description (Dimension N FAIL)
+- [FINDING_TYPE] file:line — description (Dimension N FAIL)
+### Layer 3: Auto-Fix Results
+- Fixed: [N]
+- Manual intervention needed: [N]
+- [list each fix applied with result]
+### Final Status: PASS / FAIL
+- Remaining issues: [list if any]
+```
+---
+## Detailed Changelog
+Produced alongside the summary report when Layer 3 runs.
+```
+### FTM Audit Changelog — [YYYY-MM-DD HH:MM]
+#### Findings
+| # | Type | Location | Description |
+|---|------|----------|-------------|
+| 1 | UNWIRED_COMPONENT | src/components/Widget.tsx | Imported but not rendered in Dashboard |
+| 2 | ORPHAN_ROUTE | src/views/Settings.tsx | No route config entry |
+#### Fixes Applied
+| # | Finding | Fix | Verified |
+|---|---------|-----|----------|
+| 1 | UNWIRED_COMPONENT Widget | Added <Widget /> to Dashboard.tsx:47 | ✅ PASS |
+| 2 | ORPHAN_ROUTE Settings | Added /settings route to router.tsx:23 | ✅ PASS |
+#### Manual Intervention Required
+| # | Finding | Reason | Suggested Action |
+|---|---------|--------|-----------------|
+| (none) | | | |
+#### Final Status: PASS (0 remaining issues)
+```
+---
+## Layer-by-Layer Finding Format
+### Layer 1 (knip) Finding Format
+```
+Layer 1 findings:
+- [UNUSED_FILE] src/components/OldWidget.tsx — not imported anywhere
+- [UNUSED_EXPORT] src/utils/helpers.ts:42 — export `formatDate` not used
+- [UNUSED_DEP] package.json — `lodash` listed but never imported
+- [UNLISTED_DEP] src/api/client.ts — imports `axios` but it's not in package.json
+```
+### Layer 2 (Adversarial) Finding Format
+```
+Layer 2 findings:
+- [UNWIRED_COMPONENT] src/components/NewWidget.tsx — imported in Dashboard.tsx:5 but never rendered in JSX (Dimension 2 FAIL)
+- [ORPHAN_ROUTE] src/views/SettingsView.tsx — no route in router config points to this view (Dimension 3 FAIL)
+- [DEAD_STORE_FIELD] src/store/userSlice.ts:23 — `userPreferences` written in reducer but never read by any selector (Dimension 4 FAIL)
+- [UNCALLED_API] src/api/billing.ts:15 — `fetchInvoices()` exported but never called (Dimension 5 FAIL)
+```
+**Requirement:** Every finding must include file:line evidence. "I think this might be unused" is not acceptable — show the grep results or the missing link in the chain.
+### Phase 3 (Runtime) Finding Format
+When runtime-only findings are present (passed Layers 1-2 but failed Phase 3):
+```
+Phase 3 (Runtime) findings:
+- [RUNTIME_FAIL] /analytics — page returns 404 despite route registered in router.tsx:18
+- [RUNTIME_WARN] /settings — route renders but <UserPreferences /> missing from ARIA tree
+```
+Label these as `runtime-only` so developers know they won't be caught by future static checks alone.

package/ftm-audit/scripts/run-knip.sh ADDED Viewed

@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# run-knip.sh — Run knip and return structured findings
+# Exit 0 if clean, 1 if findings exist
+set -euo pipefail
+# Check for package.json
+if [ ! -f "package.json" ]; then
+  echo '{"skipped": true, "reason": "No package.json found"}'
+  exit 0
+fi
+# Run knip with JSON reporter
+OUTPUT=$(npx knip --reporter json 2>/dev/null || true)
+# Check if output is empty or just '{}'
+if [ -z "$OUTPUT" ] || [ "$OUTPUT" = "{}" ] || [ "$OUTPUT" = '{"files":[],"issues":[]}' ]; then
+  echo '{"clean": true, "files": [], "issues": []}'
+  exit 0
+fi
+echo "$OUTPUT"
+exit 1

package/ftm-audit.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ name: ftm-audit
2	+ description: Dual-purpose wiring audit that verifies all code is actually connected to the running application. Combines static analysis (knip) with adversarial LLM audit and auto-fixes anything it finds. Use when user says "audit", "wiring check", "verify wiring", "dead code", "check imports", "unused code", "find dead code", or "audit wiring". Also auto-invoked by ftm-executor after each task.

package/ftm-brainstorm/SKILL.md ADDED Viewed

@@ -0,0 +1,379 @@
+---
+name: ftm-brainstorm
+description: Research-powered Socratic brainstorming that dispatches parallel agents to search the web and GitHub for real-world patterns, then synthesizes findings into actionable suggestions with citations. Use this skill whenever the user wants to brainstorm, explore ideas, think through a feature, plan a project, or flesh out a concept before building. Also triggers when the user pastes a large block of text (notes, prior brainstorm, meeting transcript, spec draft, stream-of-consciousness dump) and wants to turn it into something buildable — phrases like "help me build this", "turn this into a plan", "here's what I've been thinking", or just a big paste followed by "what do you think?" or "go". Triggers on "brainstorm", "help me think through", "I have an idea for", "how should I approach", "let's explore", "what if we built", "I'm thinking about", "help me figure out", or any conversation where the user has a concept they want to develop before writing code. Even vague ideas like "I want to build something that..." or "what's the best way to..." should trigger this skill.
+---
+## Events
+### Emits
+- `plan_generated` — when Phase 3 completes and plan is saved
+  - Payload: `{ plan_path, plan_title, task_count, wave_count }`
+- `task_completed` — when the full brainstorm-to-plan cycle finishes
+  - Payload: `{ task_title, plan_path, duration_ms }`
+### Listens To
+- `task_received` — begin ideation when ftm-mind routes an incoming task for exploration
+  - Expected payload: `{ task_description, plan_path, wave_number, task_number }`
+- `research_complete` — consume structured findings from ftm-researcher for the current research sprint
+  - Expected payload: `{ query, mode, findings_count, consensus_count, contested_count, unique_count, sources_count, council_used, duration_ms }`
+## Config Read
+Before dispatching any agents, read `~/.claude/ftm-config.yml`:
+- Use the `planning` model from the active profile for all research agents
+- Example: if profile is `balanced`, agents get `model: opus`
+- If config missing, use session default
+## Research Sprint Dispatch
+Each research sprint invokes ftm-researcher rather than dispatching agents directly.
+Interface:
+- Pass: { research_question: [derived from current turn], context_register: [all prior findings], depth_mode: [based on turn number] }
+- Receive: { findings, disagreement_map, confidence_scores }
+Depth mode mapping:
+- Turns 1-2 (BROAD): ftm-researcher quick mode (3 finders)
+- Turns 3-5 (FOCUSED): ftm-researcher standard mode (7 finders + reconciler)
+- Turns 6+ (IMPLEMENTATION): ftm-researcher deep mode (full pipeline with council)
+The brainstorm skill consumes the researcher's structured output and weaves it into:
+- 3-5 numbered suggestions with evidence and source URLs
+- A recommended option with rationale
+- Challenges based on contested claims from the disagreement map
+- Targeted questions based on research gaps
+## Blackboard Read
+Before starting, load context from the blackboard:
+1. Read `~/.claude/ftm-state/blackboard/context.json` — check current_task, recent_decisions, active_constraints
+2. Read `~/.claude/ftm-state/blackboard/experiences/index.json` — filter by task_type "feature"/"investigation"
+3. Load top 3-5 matching experience files for past brainstorm lessons
+4. Read `~/.claude/ftm-state/blackboard/patterns.json` — check execution_patterns and user_behavior
+If missing or empty, proceed without.
+---
+# THE CORE LOOP
+This skill is a **multi-turn research conversation**. Every single turn after the first follows the same cycle. There are no shortcuts, no collapsing turns, no "let me just generate the plan now."
+```
+EVERY TURN (after initial intake):
+  1. RESEARCH SPRINT  — invoke ftm-researcher with context
+  2. SYNTHESIZE       — merge findings into suggestions with evidence
+  3. CHALLENGE        — push back on weak assumptions, surface trade-offs
+  4. ASK              — 1-2 targeted questions to extract more from the user
+  5. >>> STOP <<<     — wait for the user. Do NOT continue.
+```
+The research sprints get progressively deeper. The questions get progressively sharper. Each cycle builds on everything before it. The goal is to extract the user's complete vision AND ground it in real-world evidence before generating any plan.
+**You maintain a CONTEXT REGISTER** — a running mental document of everything learned so far. Every research sprint receives this register so agents don't re-search old ground. After each turn, append what you learned.
+**Research depth escalates automatically:**
+- **Turns 1-2: BROAD** — map the landscape, major approaches, who's done this
+- **Turns 3-5: FOCUSED** — drill into the user's chosen direction, real trade-offs, failure modes
+- **Turns 6+: IMPLEMENTATION** — concrete libraries, code patterns, integration specifics
+---
+# PHASE 0: REPO SCAN (automatic, silent)
+Run this in the background before your first response. Do not ask.
+Spawn an **Explore** agent (subagent_type: Explore):
+```
+Analyze the current repository: project type, tech stack, architecture,
+patterns in use, existing infrastructure, scale indicators.
+Focus on what's relevant for proposing new features or architectural changes.
+```
+Store as your project context. Reference throughout all phases. If not in a git repo, skip and ask about stack during intake.
+---
+# PHASE 1: INTAKE
+Detect which path you're on:
+## Path A: Fresh Idea (short/vague message)
+**Turn 1 ONLY:** Ask 1-2 questions to understand the core idea. What is it, who is it for, what problem does it solve. One question at a time. If the opening message covers some of these, skip ahead.
+**>>> STOP. Wait for response. <<<**
+**Turn 2:** Take the user's answer. NOW run your first research sprint (3 agents, BROAD depth — see below). Synthesize, challenge, ask 1-2 more questions about architecture and constraints. Propose specific options from the research rather than open-ended questions.
+**>>> STOP. Wait for response. <<<**
+**Turn 3+:** You're now in the core loop. Every turn from here follows the cycle: research sprint -> synthesize -> challenge -> ask -> STOP.
+## Path B: Brain Dump (large paste, notes, transcript)
+**Turn 1:** Parse the entire paste. Extract: decisions already made, open questions, assumptions to validate, contradictions, gaps. Present structured summary. Ask for confirmation + any gaps (success criteria, v1 scope, non-negotiables). Do NOT ask basic questions already answered in the paste.
+**>>> STOP. Wait for confirmation. <<<**
+**Turn 2:** Take the confirmation. Run first research sprint in BRAIN DUMP MODE (agents search for each specific architectural claim from the paste). Present novelty map. Synthesize, challenge, ask.
+**>>> STOP. Wait for response. <<<**
+**Turn 3+:** Core loop continues.
+---
+# DISCUSS MODE
+When the user provides a clear, specific spec or feature description (not a vague idea), skip broad research and go straight to targeted analysis.
+## Detection
+Discuss mode activates when:
+- The user's input is 200+ words with specific technical details
+- The user says "I know what I want to build" or "here's my spec" or "discuss this"
+- The input contains file paths, function names, or architecture details
+- The user explicitly requests "discuss" rather than "brainstorm"
+## Flow
+Instead of the standard brainstorm research → synthesis → suggestions flow:
+1. **Parse the spec** — Extract: what's being built, key components, tech stack, constraints
+2. **Identify gray areas** — Find the parts that aren't specified:
+   - Edge cases not mentioned
+   - Error handling not specified
+   - Performance implications not considered
+   - Security concerns not addressed
+   - Integration points not defined
+3. **Ask targeted questions** — Present 3-5 specific questions about the gray areas:
+   ```
+   Your spec is clear on [X, Y, Z]. A few gray areas to nail down:
+   1. [Edge case question] — e.g., "What happens when the user submits while offline?"
+   2. [Error handling question] — e.g., "Should failed API calls retry or show an error?"
+   3. [Performance question] — e.g., "Expected data volume? 100 items or 100K?"
+   4. [Security question] — e.g., "Who should have access to this endpoint?"
+   5. [Integration question] — e.g., "Does this need to sync with the existing auth system?"
+   ```
+4. **Refine based on answers** — Each answer narrows the spec. After 2-3 rounds of Q&A, the spec should be implementation-ready.
+5. **Output: implementation-ready spec** — Not a brainstorm document, but a tight spec that can feed directly into plan generation.
+## Gray Area Categories by Feature Type
+| Feature Type | Common Gray Areas |
+|---|---|
+| API endpoint | Auth, rate limiting, pagination, error codes, versioning |
+| UI component | Loading states, empty states, error states, accessibility, responsive |
+| Data pipeline | Failure modes, retry logic, idempotency, monitoring, backpressure |
+| Integration | Auth flow, webhook handling, rate limits, data mapping, error recovery |
+| Config change | Rollback plan, feature flags, gradual rollout, monitoring |
+---
+# PHASE 2: RESEARCH + CHALLENGE LOOP
+This is the heart of the skill. Unlimited turns. Each one follows the cycle.
+## Step 1: Dispatch Research Sprint
+Every turn, read `references/agent-prompts.md` and spawn **3 parallel agents** (subagent_type: general-purpose, model: from ftm-config `planning` profile). Each agent gets:
+1. **Project context** from Phase 0
+2. **Full context register** — everything learned across ALL prior turns
+3. **Research depth level** for this turn (broad/focused/implementation)
+4. **Previous findings summary** so they don't re-search
+5. **This turn's specific research question** — derived from what the user just said
+6. **Brain dump claims** if Path B
+The 3 agents search from different vectors:
+- **Web Researcher** — blog posts, case studies, architectural write-ups
+- **GitHub Explorer** — repos, code patterns, open-source implementations
+- **Competitive Analyst** — products, tools, market gaps, user complaints
+Each turn's research question should be DIFFERENT from the last. The user's response reveals new angles, constraints, or decisions — use those to formulate new, more specific search queries. If the user chose approach A over B, this turn's research digs into A's implementation details, not the broad landscape again.
+## Step 2: Synthesize into 3-5 Suggestions
+Once agents return, merge findings into **3-5 numbered suggestions**. Lead with your recommendation.
+Each suggestion needs:
+1. **The suggestion** — concrete and actionable
+2. **Real-world evidence** — which search results back this up, with URLs
+3. **Why this matters** — specific advantage for this project
+4. **Trade-off** — what you give up
+Label suggestion #1 as **RECOMMENDED** with a "Why I'd pick this" rationale.
+If research was thin, present fewer suggestions. Quality over quantity. If all 3 agents returned weak results, be honest: "Research didn't surface strong prior art — this might be genuinely novel, or we should reframe the search."
+**Brain dump mode:** Present a **Novelty Map** table before suggestions:
+| Brain Dump Claim | Verdict | Evidence |
+|---|---|---|
+| [claim] | Solved / Partially Solved / Novel | [link or explanation] |
+## Step 3: Challenge and Extract
+After suggestions, challenge the user's thinking. Pick 2-3 challenge patterns:
+- **"Have you considered..."** — surface a pattern they may not know about
+- **"What happens when..."** — probe edge cases and scaling
+- **"The evidence suggests..."** — when research contradicts an assumption
+- **"A simpler approach might be..."** — when they're over-engineering
+- **"Users of [product] complained about..."** — inject real feedback
+**YAGNI instinct:** Actively look for scope to cut. If research shows successful products launched with less, argue for smaller v1 scope.
+## Step 4: Ask 1-2 Targeted Questions + Invite Continuation
+Then ask **1-2 questions** that will make the next research sprint more productive. The questions should:
+- Narrow scope based on what research surfaced
+- Reveal constraints research can't discover
+- Test commitment to specific approaches
+- Uncover success criteria and non-negotiables
+Prefer multiple-choice when the answer space is bounded. Each question should unlock a NEW research vector for next turn.
+**After your questions, always signal that more depth is available.** Something like: "Answer these and I'll run another research sprint focused on [next topic]. Or if you feel we've covered enough ground, just say the word and we'll move to planning." The user should never feel like the brainstorm is wrapping up unless THEY decide it is. Your default posture is: there's always more to explore.
+## Step 5: STOP
+**>>> STOP. Do NOT continue to the next turn. Wait for the user. <<<**
+This is non-negotiable. The user's response is the input for the next research sprint. Without it, the next sprint has nothing new to search for.
+---
+## Feature-Type Detection
+When you learn enough to classify the feature, add type-specific questions. Pick 2-3 unknowns, don't dump everything.
+| Type | Signals | Key Questions |
+|---|---|---|
+| UI/Frontend | "page", "component", "dashboard" | Layout density? Responsive approach? Loading/empty/error states? |
+| API/Backend | "endpoint", "API", "service" | REST vs GraphQL? Auth mechanism? Pagination strategy? |
+| Data/Storage | "database", "store", "persist" | SQL vs NoSQL? Read/write ratio? Consistency requirements? |
+| Integration | "connect to", "sync with" | Push/pull/both? Real-time or batch? Retry handling? |
+| Automation | "automate", "trigger", "schedule" | Trigger mechanism? Failure notification? Idempotency? |
+| CLI Tool | "command", "CLI", "terminal" | Interactive or not? Output format? Config file approach? |
+| AI/ML | "AI", "model", "generate", "LLM" | Which model? Latency tolerance? Fallback? Cost ceiling? |
+---
+## When to Suggest Phase 3
+You may SUGGEST moving to plan generation when:
+- User explicitly says "ok I think I know what I want", "let's plan", "I'm ready" or similar
+- Research returns diminishing returns AND user seems satisfied (same patterns keep appearing across 2+ sprints)
+- You've covered: core architecture, key technical decisions, data model, integrations AND the user isn't raising new questions
+**Default posture: keep exploring.** Never suggest Phase 3 just because a certain number of turns have passed. Some ideas need 3 turns, some need 15. The user decides when they're done, not you. If research is thin on a topic, try reformulating the search before suggesting you've exhausted the space.
+**HARD GATE: The user must explicitly approve.** Present a brief summary:
+```
+Here's what I think we've landed on:
+**Building:** [one sentence]
+**Core approach:** [recommended architecture/pattern]
+**Key decisions:** [2-3 bullets]
+**Scope for v1:** [what's in, what's deferred]
+Ready to turn this into an implementation plan, or more to explore?
+```
+If they say yes, proceed. If they raise new questions, stay in Phase 2.
+---
+# PHASE 3: PLAN GENERATION
+Read `references/plan-template.md` for the full template and rules. Present the plan incrementally (vision -> tasks -> agents/waves), getting approval at each step.
+---
+## Relationship to superpowers:brainstorming
+- **ftm-brainstorm** (this): Idea exploration with live research. User is figuring out WHAT to build.
+- **superpowers:brainstorming**: Design/spec work. User knows what they're building, needs HOW.
+If user already completed superpowers:brainstorming, point to ftm-executor instead. If user explicitly invokes this skill, always run it.
+---
+## Context Compression
+After turn 5 in a brainstorm session, earlier turns start consuming significant context. Apply compression to maintain quality in later turns.
+### Trigger
+- Turns 1-5: No compression. Full fidelity.
+- Turn 6+: Compress turns 1 through (current - 3). Keep the 3 most recent turns at full fidelity.
+### Compression Strategy
+For each compressed turn, replace the full content with a summary:
+```
+[Turn N summary]
+- Topic: [what was discussed]
+- Key decisions: [bullet list of decisions made]
+- Open questions resolved: [what was answered]
+- Artifacts produced: [any specs, diagrams, code snippets referenced]
+```
+### What to Preserve in Summaries
+- Decisions and their rationale (WHY something was decided)
+- Constraints discovered
+- Requirements confirmed by the user
+- Technical choices made
+### What to Drop
+- Exploratory tangents that were abandoned
+- Research citations already synthesized
+- Verbose explanations of options not chosen
+- Repeated context that's already captured in later turns
+### Implementation
+This is implemented at the skill level, not via hooks. When presenting a response at turn 6+:
+1. Mentally compress old turns using the strategy above
+2. Reference compressed summaries when needed
+3. Keep recent turns verbatim for conversational continuity
+4. If the user references something from a compressed turn, expand it on demand
+---
+## Session State (for ftm-pause/resume)
+When paused, the following state must be capturable so ftm-resume can pick up exactly where you left off:
+- **Phase tracking**: current phase (0/1/2/3), path (A/B), turn number, research depth level
+- **Phase 0**: full repo scan results (or "skipped — no git repo")
+- **Phase 1**: original idea (verbatim), brain dump extraction if Path B, all user answers per round
+- **Phase 2**: every completed turn's suggestions with evidence/URLs, every challenge and response, every question and answer, accumulated decisions, the current direction, context register contents
+- **Phase 3**: which sections presented/approved, plan content so far, plan file path if saved
+This state is what ftm-pause captures and ftm-resume restores. Keep it current as you go.
+## Blackboard Write
+After completing, update:
+1. `~/.claude/ftm-state/blackboard/context.json`:
+   - Set current_task status to "complete"
+   - Append decision summary to recent_decisions (cap at 10)
+   - Update session_metadata.skills_invoked and last_updated
+2. Write experience file to `~/.claude/ftm-state/blackboard/experiences/YYYY-MM-DD_task-slug.json`:
+   - task_type: "feature" or "investigation"
+   - feature_type: detected type (UI, API, etc.)
+   - architectural_direction: the approach chosen
+   - research_quality: how useful the research sprints were (high/medium/low)
+   - turns_to_resolution: how many Phase 2 turns before Phase 3
+   - tags: keywords for future matching
+3. Update `experiences/index.json` with the new entry
+4. Emit `plan_generated` with `{ plan_path, plan_title, task_count, wave_count }` (if Phase 3 completed)
+5. Emit `task_completed` with `{ task_title, plan_path, duration_ms }`

package/ftm-brainstorm/evals/evals.json ADDED Viewed

@@ -0,0 +1,100 @@
+{
+  "skill_name": "ftm-brainstorm",
+  "evals": [
+    {
+      "id": 0,
+      "name": "fresh-idea-intake",
+      "prompt": "I'm thinking about building an app that helps people find study buddies at their university. Like Tinder but for studying.",
+      "expected_output": "Phase 0 repo scan launched in background, 1-2 intake questions, hard STOP",
+      "files": [],
+      "assertions": [
+        {"name": "one_or_two_questions", "description": "Asks 1-2 questions max, not a wall of questions"},
+        {"name": "no_research_sprint_turn1", "description": "Does NOT dispatch research agents on the very first turn — intake only"},
+        {"name": "hard_stop", "description": "Ends with a question and waits — does NOT proceed to synthesize or generate suggestions"},
+        {"name": "repo_scan_launched", "description": "Mentions or silently launches a repo/codebase scan agent in background"}
+      ]
+    },
+    {
+      "id": 1,
+      "name": "fresh-idea-turn2-research",
+      "prompt": "It's for college students who want to find people in the same classes to study with. Matching based on courses, study style, and schedule availability.",
+      "expected_output": "First research sprint dispatched (3 agents), synthesized suggestions with citations, challenge, 1-2 questions, STOP",
+      "files": [],
+      "multi_turn_context": "Turn 2. Turn 1: user said 'building study buddy app like Tinder for studying', skill asked intake questions, user now responds with details.",
+      "assertions": [
+        {"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents (web, github, competitive) — not fewer"},
+        {"name": "real_citations", "description": "At least 3 unique URLs to real repos/articles/products in the synthesis"},
+        {"name": "suggestions_with_evidence", "description": "Presents numbered suggestions (3-5) with real-world evidence backing each one"},
+        {"name": "recommendation_labeled", "description": "Suggestion #1 is labeled RECOMMENDED with rationale"},
+        {"name": "challenge_present", "description": "Includes at least one challenge/pushback after suggestions"},
+        {"name": "ends_with_question", "description": "Ends with 1-2 targeted questions to drive next research sprint"},
+        {"name": "hard_stop", "description": "Does NOT continue past the questions — waits for user response"},
+        {"name": "depth_is_broad", "description": "Research queries are landscape-level (major approaches, who's done this) not implementation-specific"}
+      ]
+    },
+    {
+      "id": 2,
+      "name": "turn3-deeper-research",
+      "prompt": "I like option 2 — the React Native approach with Firebase. But I'm worried about the matching algorithm complexity. Also we need to handle the cold-start problem when a new university joins.",
+      "expected_output": "Second research sprint (deeper, focused on RN+Firebase+matching), new suggestions building on prior, new challenges, new questions",
+      "files": [],
+      "multi_turn_context": "Turn 3. Prior turns: user described study buddy app, first research sprint found 5 approaches, user now picks one and raises two specific concerns.",
+      "assertions": [
+        {"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents again — every turn gets a research sprint"},
+        {"name": "research_is_deeper", "description": "Search queries target matching algorithms and cold-start specifically, NOT broad 'study buddy app' landscape again"},
+        {"name": "builds_on_prior", "description": "References prior turn's findings — does not re-present the same 5 approaches"},
+        {"name": "new_citations", "description": "At least 2 URLs not seen in prior turns — fresh research, not recycled"},
+        {"name": "addresses_user_concerns", "description": "Suggestions specifically address matching algorithm complexity AND cold-start problem"},
+        {"name": "challenge_present", "description": "Challenges the user on at least one assumption or pushes back on scope"},
+        {"name": "ends_with_question", "description": "Ends with 1-2 questions that unlock the NEXT research vector"},
+        {"name": "hard_stop", "description": "Does NOT continue past the questions"}
+      ]
+    },
+    {
+      "id": 3,
+      "name": "brain-dump-intake",
+      "prompt": "help me build all the suggestions in this chat: [brain dump about eng-buddy]",
+      "expected_output": "Path B structured extraction with repo context, confirmation gate, no research yet",
+      "files": ["brain-dump-input.md"],
+      "assertions": [
+        {"name": "decided_section", "description": "Contains a 'Decided' or 'Decisions already made' section"},
+        {"name": "open_questions_section", "description": "Contains an 'Open questions' or 'Gaps' section"},
+        {"name": "no_basic_questions", "description": "Does NOT ask basic 'what are you building?' questions already answered by the paste"},
+        {"name": "confirmation_gate", "description": "Ends with a confirmation question before proceeding to research"},
+        {"name": "no_research_sprint", "description": "Does NOT dispatch research agents or present suggestions on this turn"},
+        {"name": "hard_stop", "description": "Stops after asking for confirmation — does not proceed"}
+      ]
+    },
+    {
+      "id": 4,
+      "name": "brain-dump-turn2-research",
+      "prompt": "Yeah that looks right, go ahead and research it",
+      "expected_output": "First research sprint in brain dump mode: novelty map, suggestions with citations, challenges",
+      "files": ["brain-dump-input.md"],
+      "multi_turn_context": "Turn 2 of brain dump. Turn 1: user pasted eng-buddy brain dump, skill extracted structured summary, user now confirms.",
+      "assertions": [
+        {"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents searching for specific brain dump claims"},
+        {"name": "novelty_map_present", "description": "Contains a Novelty Map table with solved/partially solved/novel verdicts"},
+        {"name": "real_citations", "description": "At least 5 unique URLs to real repos/articles/products"},
+        {"name": "brain_dump_claims_researched", "description": "Agents searched for specific architectural claims from the dump, not just broad topic searches"},
+        {"name": "challenge_present", "description": "At least one challenge/pushback raised"},
+        {"name": "ends_with_question", "description": "Ends with 1-2 targeted questions"},
+        {"name": "hard_stop", "description": "Does NOT proceed past questions"}
+      ]
+    },
+    {
+      "id": 5,
+      "name": "phase3-gate",
+      "prompt": "Ok I think I know what I want. Let's turn this into a plan.",
+      "expected_output": "Vision summary for approval, NOT the full plan yet",
+      "files": [],
+      "multi_turn_context": "Turn 6+ of brainstorming. Previous turns explored study-buddy app, settled on React Native + Firebase, matching algorithm, cold-start solution. User now wants to move to planning.",
+      "assertions": [
+        {"name": "vision_summary", "description": "Presents a brief 'here's what we've landed on' summary before generating the full plan"},
+        {"name": "approval_gate", "description": "Asks for explicit confirmation before proceeding to full plan generation"},
+        {"name": "does_not_dump_full_plan", "description": "Does NOT generate the entire task breakdown, agent assignments, and wave structure in this message"},
+        {"name": "references_plan_template", "description": "Reads or references references/plan-template.md for plan generation"}
+      ]
+    }
+  ]
+}